diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-02-17 19:35:08 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-02-17 19:35:08 +0000 |
commit | c60b95818e4f6c00c872114318d01109f97a7fa3 (patch) | |
tree | 9361932ed8cde0f9d3c167adb0eb75ff1401ed99 | |
parent | 3897d3b845ab73af1f4abd7fd8cc6e43925af1b4 (diff) | |
download | src-c60b95818e4f6c00c872114318d01109f97a7fa3.tar.gz src-c60b95818e4f6c00c872114318d01109f97a7fa3.zip |
Notes
28 files changed, 863 insertions, 250 deletions
diff --git a/docs/Extensions.rst b/docs/Extensions.rst index 850c42750911..2b12123cdf68 100644 --- a/docs/Extensions.rst +++ b/docs/Extensions.rst @@ -61,7 +61,7 @@ types ``IMAGE_REL_I386_SECREL`` (32-bit) or ``IMAGE_REL_AMD64_SECREL`` (64-bit). the target. It corresponds to the COFF relocation types ``IMAGE_REL_I386_SECTION`` (32-bit) or ``IMAGE_REL_AMD64_SECTION`` (64-bit). -.. code-block:: gas +.. code-block:: none .section .debug$S,"rn" .long 4 diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 5ac17015953e..e93a02f6b023 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -3997,7 +3997,7 @@ DIFile ``DIFile`` nodes represent files. The ``filename:`` can include slashes. -.. code-block:: llvm +.. code-block:: none !0 = !DIFile(filename: "path/to/file", directory: "/path/to/dir", checksumkind: CSK_MD5, diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index aef11daf194b..da86be3f96ff 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -61,6 +61,9 @@ Non-comprehensive list of changes in this release with LLVM option -adce-remove-loops when the loop body otherwise has no live operations. + * The llvm-cov tool can now export coverage data as json. Its html output mode + has also improved. + * ... next change ... .. NOTE @@ -81,6 +84,37 @@ Non-comprehensive list of changes in this release * Significant build-time and binary-size improvements when compiling with debug info (-g). +LLVM Coroutines +--------------- + +Experimental support for :doc:`Coroutines` was added, which can be enabled +with ``-enable-coroutines`` in ``opt`` command tool or using +``addCoroutinePassesToExtensionPoints`` API when building the optimization +pipeline. + +For more information on LLVM Coroutines and the LLVM implementation, see +`2016 LLVM Developers’ Meeting talk on LLVM Coroutines +<http://llvm.org/devmtg/2016-11/#talk4>`_. + +Regcall and Vectorcall Calling Conventions +-------------------------------------------------- + +Support was added for _regcall calling convention. +Existing __vectorcall calling convention support was extended to include +correct handling of HVAs. + +The __vectorcall calling convention was introduced by Microsoft to +enhance register usage when passing parameters. +For more information please read `__vectorcall documentation +<https://msdn.microsoft.com/en-us/library/dn375768.aspx>`_. + +The __regcall calling convention was introduced by Intel to +optimize parameter transfer on function call. +This calling convention ensures that as many values as possible are +passed or returned in registers. +For more information please read `__regcall documentation +<https://software.intel.com/en-us/node/693069>`_. + Code Generation Testing ----------------------- @@ -258,6 +292,21 @@ External Open Source Projects Using LLVM 4.0.0 * A project... +LDC - the LLVM-based D compiler +------------------------------- + +`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It +pragmatically combines efficiency, control, and modeling power, with safety and +programmer productivity. D supports powerful concepts like Compile-Time Function +Execution (CTFE) and Template Meta-Programming, provides an innovative approach +to concurrency and offers many classical paradigms. + +`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler +combined with LLVM as backend to produce efficient native code. LDC targets +x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM +and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64 +are underway. + Additional Information ====================== diff --git a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h index cb4a16cb5b7b..0cc3b26e9659 100644 --- a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h +++ b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h @@ -31,6 +31,23 @@ class StringRef; class LLVMContext; class TargetMachine; +/// Wrapper around MemoryBufferRef, owning the identifier +class ThinLTOBuffer { + std::string OwnedIdentifier; + StringRef Buffer; + +public: + ThinLTOBuffer(StringRef Buffer, StringRef Identifier) + : OwnedIdentifier(Identifier), Buffer(Buffer) {} + + MemoryBufferRef getMemBuffer() const { + return MemoryBufferRef(Buffer, + {OwnedIdentifier.c_str(), OwnedIdentifier.size()}); + } + StringRef getBuffer() const { return Buffer; } + StringRef getBufferIdentifier() const { return OwnedIdentifier; } +}; + /// Helper to gather options relevant to the target machine creation struct TargetMachineBuilder { Triple TheTriple; @@ -280,7 +297,7 @@ private: /// Vector holding the input buffers containing the bitcode modules to /// process. - std::vector<MemoryBufferRef> Modules; + std::vector<ThinLTOBuffer> Modules; /// Set of symbols that need to be preserved outside of the set of bitcode /// files. diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fd156fa7dd07..2c7bffe76503 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13072,9 +13072,15 @@ SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N, !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) return SDValue(); - if (InVT1 != InVT2) + // Legalizing INSERT_SUBVECTOR is tricky - you basically have to + // lower it back into a BUILD_VECTOR. So if the inserted type is + // illegal, don't even try. + if (InVT1 != InVT2) { + if (!TLI.isTypeLegal(InVT2)) + return SDValue(); VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); + } ShuffleNumElems = NumElems * 2; } else { // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3b91e58879b4..4a9042cfb3f4 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -502,8 +502,17 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); - unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - MachineInstr *DefMI = MRI->getVRegDef(VReg); + unsigned Reg; + MachineInstr *DefMI; + RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0)); + if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + Reg = R->getReg(); + DefMI = nullptr; + } else { + Reg = getVR(Node->getOperand(0), VRBaseMap); + DefMI = MRI->getVRegDef(Reg); + } + unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && @@ -519,20 +528,26 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); MRI->clearKillFlags(SrcReg); } else { - // VReg may not support a SubIdx sub-register, and we may need to + // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - VReg = ConstrainForSubReg(VReg, SubIdx, - Node->getOperand(0).getSimpleValueType(), - Node->getDebugLoc()); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + Reg = ConstrainForSubReg(Reg, SubIdx, + Node->getOperand(0).getSimpleValueType(), + Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), - TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); + MachineInstrBuilder CopyMI = + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + CopyMI.addReg(Reg, 0, SubIdx); + else + CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 9ca646534e2b..996c95bd5f07 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5832,6 +5832,15 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + auto *Caller = CS.getInstruction()->getParent()->getParent(); + if (TLI.supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + isTailCall = false; + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 004fa703c192..64e6c221229b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2782,14 +2782,15 @@ struct MatchScope { /// for this. class MatchStateUpdater : public SelectionDAG::DAGUpdateListener { - SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes; - SmallVectorImpl<MatchScope> &MatchScopes; + SDNode **NodeToMatch; + SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes; + SmallVectorImpl<MatchScope> &MatchScopes; public: - MatchStateUpdater(SelectionDAG &DAG, - SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN, - SmallVectorImpl<MatchScope> &MS) : - SelectionDAG::DAGUpdateListener(DAG), - RecordedNodes(RN), MatchScopes(MS) { } + MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch, + SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN, + SmallVectorImpl<MatchScope> &MS) + : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch), + RecordedNodes(RN), MatchScopes(MS) {} void NodeDeleted(SDNode *N, SDNode *E) override { // Some early-returns here to avoid the search if we deleted the node or @@ -2799,6 +2800,9 @@ public: // update listener during matching a complex patterns. if (!E || E->isMachineOpcode()) return; + // Check if NodeToMatch was updated. + if (N == *NodeToMatch) + *NodeToMatch = E; // Performing linear search here does not matter because we almost never // run this code. You'd have to have a CSE during complex pattern // matching. @@ -3091,7 +3095,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // consistent. std::unique_ptr<MatchStateUpdater> MSU; if (ComplexPatternFuncMutatesDAG()) - MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes, + MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes, MatchScopes)); if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index 104fb199da08..40537e4fa784 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -150,13 +150,13 @@ static void computePrevailingCopies( } static StringMap<MemoryBufferRef> -generateModuleMap(const std::vector<MemoryBufferRef> &Modules) { +generateModuleMap(const std::vector<ThinLTOBuffer> &Modules) { StringMap<MemoryBufferRef> ModuleMap; for (auto &ModuleBuffer : Modules) { assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) == ModuleMap.end() && "Expect unique Buffer Identifier"); - ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer; + ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer(); } return ModuleMap; } @@ -522,13 +522,13 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder, } // end anonymous namespace void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { - MemoryBufferRef Buffer(Data, Identifier); + ThinLTOBuffer Buffer(Data, Identifier); if (Modules.empty()) { // First module added, so initialize the triple and some options LLVMContext Context; StringRef TripleStr; - ErrorOr<std::string> TripleOrErr = - expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer)); + ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); if (TripleOrErr) TripleStr = *TripleOrErr; Triple TheTriple(TripleStr); @@ -538,8 +538,8 @@ void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { else { LLVMContext Context; StringRef TripleStr; - ErrorOr<std::string> TripleOrErr = - expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer)); + ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); if (TripleOrErr) TripleStr = *TripleOrErr; assert(TMBuilder.TheTriple.str() == TripleStr && @@ -588,7 +588,8 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() { uint64_t NextModuleId = 0; for (auto &ModuleBuffer : Modules) { Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = - object::ModuleSummaryIndexObjectFile::create(ModuleBuffer); + object::ModuleSummaryIndexObjectFile::create( + ModuleBuffer.getMemBuffer()); if (!ObjOrErr) { // FIXME diagnose logAllUnhandledErrors( @@ -852,8 +853,9 @@ void ThinLTOCodeGenerator::run() { Context.setDiscardValueNames(LTODiscardValueNames); // Parse module now - auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false, - /*IsImporting*/ false); + auto TheModule = + loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false, + /*IsImporting*/ false); // CodeGen auto OutputBuffer = codegen(*TheModule); @@ -943,8 +945,8 @@ void ThinLTOCodeGenerator::run() { std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); std::sort(ModulesOrdering.begin(), ModulesOrdering.end(), [&](int LeftIndex, int RightIndex) { - auto LSize = Modules[LeftIndex].getBufferSize(); - auto RSize = Modules[RightIndex].getBufferSize(); + auto LSize = Modules[LeftIndex].getBuffer().size(); + auto RSize = Modules[RightIndex].getBuffer().size(); return LSize > RSize; }); @@ -996,8 +998,9 @@ void ThinLTOCodeGenerator::run() { } // Parse module now - auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false, - /*IsImporting*/ false); + auto TheModule = + loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false, + /*IsImporting*/ false); // Save temps: original file. saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index 9058617768dd..938779d23690 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -91,7 +91,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType<v2i32>>, CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>, - CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>, + CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, // Big endian vectors must be passed as if they were 1-element vectors so that // their lanes are in a consistent order. @@ -138,8 +138,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>, - // A SwiftError is passed in X19. - CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>, + // A SwiftError is passed in X21. + CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, @@ -289,7 +289,7 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; def CSR_AArch64_AAPCS_SwiftError - : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X19)>; + : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; // The function used by Darwin to obtain the address of a thread-local variable // guarantees more than a normal AAPCS function. x16 and x17 are used on the diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 2f67a105b4d1..849058bdfbdb 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3155,7 +3155,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } if (VA.isRegLoc()) { - if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) { + if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && + Outs[0].VT == MVT::i64) { assert(VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"); assert(!Ins.empty() && Ins[0].VT == MVT::i64 && diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 9c278a52a7ff..7a7b7fede7c8 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -26,8 +26,8 @@ def CC_ARM_APCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is passed in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is passed in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, @@ -51,8 +51,8 @@ def RetCC_ARM_APCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is returned in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is returned in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, @@ -166,8 +166,8 @@ def CC_ARM_AAPCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is passed in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is passed in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, @@ -182,8 +182,8 @@ def RetCC_ARM_AAPCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is returned in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is returned in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, @@ -206,8 +206,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is passed in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is passed in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, // HFAs are passed in a contiguous block of registers, or on the stack CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>, @@ -227,8 +227,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is returned in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is returned in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, @@ -267,8 +267,8 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; -// R6 is used to pass swifterror, remove it from CSR. -def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R6)>; +// R8 is used to pass swifterror, remove it from CSR. +def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 1606c1576465..97481d49ea34 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1787,7 +1787,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { - if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { + if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && + Outs[0].VT == MVT::i32) { assert(VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"); assert(!Ins.empty() && Ins[0].VT == MVT::i32 && diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index ffd518e52968..f5e9e7dd5a93 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1013,7 +1013,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && // inalloca allocas are not treated as static, and we don't want // dynamic alloca instrumentation for them as well. - !AI.isUsedWithInAlloca()); + !AI.isUsedWithInAlloca() && + // swifterror allocas are register promoted by ISel + !AI.isSwiftError()); ProcessedAllocas[&AI] = IsInteresting; return IsInteresting; @@ -1088,12 +1090,19 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, } } - // Do not instrument acesses from different address spaces; we cannot deal - // with them. if (PtrOperand) { + // Do not instrument acesses from different address spaces; we cannot deal + // with them. Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType()); if (PtrTy->getPointerAddressSpace() != 0) return nullptr; + + // Ignore swifterror addresses. + // swifterror memory addresses are mem2reg promoted by instruction + // selection. As such they cannot have regular uses like an instrumentation + // function and it makes no sense to track them as memory. + if (PtrOperand->isSwiftError()) + return nullptr; } // Treat memory accesses to promotable allocas as non-interesting since they diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index d9659694da46..52035c79a4a3 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -488,6 +488,13 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, Value *Addr = IsWrite ? cast<StoreInst>(I)->getPointerOperand() : cast<LoadInst>(I)->getPointerOperand(); + + // swifterror memory addresses are mem2reg promoted by instruction selection. + // As such they cannot have regular uses like an instrumentation function and + // it makes no sense to track them as memory. + if (Addr->isSwiftError()) + return false; + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index f9a602bc268a..e346ebd6a000 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -189,11 +189,14 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, assert(OriginalBB == OldLoop->getHeader() && "Header should be first in RPO"); + NewLoop = new Loop(); Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); - assert(NewLoopParent && - "Expected parent loop before sub-loop in RPO"); - NewLoop = new Loop; - NewLoopParent->addChildLoop(NewLoop); + + if (NewLoopParent) + NewLoopParent->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); return OldLoop; } else { diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 85da3ba899a5..d3ea1564115b 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -302,17 +302,22 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, } NewLoopsMap NewLoops; - NewLoops[L] = NewLoop; + if (NewLoop) + NewLoops[L] = NewLoop; + else if (ParentLoop) + NewLoops[L] = ParentLoop; + // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); - - if (NewLoop) { + + // If we're unrolling the outermost loop, there's no remainder loop, + // and this block isn't in a nested loop, then the new block is not + // in any loop. Otherwise, add it to loopinfo. + if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); - } else if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 81e4b19e6eea..b09fab8d8b46 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOSTRICTALIGN %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+strict-align -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=STRICTALIGN %s ; This file contains tests for the AArch64 load/store optimizer. @@ -119,7 +120,7 @@ define void @load-pre-indexed-doubleword(%struct.doubleword* %ptr) nounwind { ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #32]! entry: %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0 - %add = load i64, i64* %a, align 4 + %add = load i64, i64* %a, align 8 br label %bar bar: %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1 @@ -132,7 +133,7 @@ define void @store-pre-indexed-doubleword(%struct.doubleword* %ptr, i64 %val) no ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #32]! entry: %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0 - store i64 %val, i64* %a, align 4 + store i64 %val, i64* %a, align 8 br label %bar bar: %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1 @@ -147,7 +148,7 @@ define void @load-pre-indexed-quadword(%struct.quadword* %ptr) nounwind { ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]! entry: %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0 - %add = load fp128, fp128* %a, align 4 + %add = load fp128, fp128* %a, align 16 br label %bar bar: %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1 @@ -160,7 +161,7 @@ define void @store-pre-indexed-quadword(%struct.quadword* %ptr, fp128 %val) noun ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]! entry: %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0 - store fp128 %val, fp128* %a, align 4 + store fp128 %val, fp128* %a, align 16 br label %bar bar: %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1 @@ -203,7 +204,7 @@ define void @load-pre-indexed-double(%struct.double* %ptr) nounwind { ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #32]! entry: %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0 - %add = load double, double* %a, align 4 + %add = load double, double* %a, align 8 br label %bar bar: %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1 @@ -216,7 +217,7 @@ define void @store-pre-indexed-double(%struct.double* %ptr, double %val) nounwin ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #32]! entry: %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0 - store double %val, double* %a, align 4 + store double %val, double* %a, align 8 br label %bar bar: %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1 @@ -1340,7 +1341,8 @@ end: define void @merge_zr32(i32* %p) { ; CHECK-LABEL: merge_zr32: ; CHECK: // %entry -; CHECK-NEXT: str xzr, [x{{[0-9]+}}] +; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] ; CHECK-NEXT: ret entry: store i32 0, i32* %p @@ -1349,11 +1351,13 @@ entry: ret void } -; Same sa merge_zr32 but the merged stores should also get paried. +; Same as merge_zr32 but the merged stores should also get paried. define void @merge_zr32_2(i32* %p) { ; CHECK-LABEL: merge_zr32_2: ; CHECK: // %entry -; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}] +; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] ; CHECK-NEXT: ret entry: store i32 0, i32* %p @@ -1370,7 +1374,11 @@ entry: define void @merge_zr32_2_offset(i32* %p) { ; CHECK-LABEL: merge_zr32_2_offset: ; CHECK: // %entry -; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #504] +; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #504] +; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #504] +; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #508] +; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #512] +; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #516] ; CHECK-NEXT: ret entry: %p0 = getelementptr i32, i32* %p, i32 126 @@ -1390,8 +1398,12 @@ entry: define void @no_merge_zr32_2_offset(i32* %p) { ; CHECK-LABEL: no_merge_zr32_2_offset: ; CHECK: // %entry -; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 -; CHECK-NEXT: str q[[REG]], [x{{[0-9]+}}, #4096] +; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 +; NOSTRICTALIGN-NEXT: str q[[REG]], [x{{[0-9]+}}, #4096] +; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4096] +; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4100] +; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4104] +; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #4108] ; CHECK-NEXT: ret entry: %p0 = getelementptr i32, i32* %p, i32 1024 @@ -1411,8 +1423,12 @@ entry: define void @merge_zr32_3(i32* %p) { ; CHECK-LABEL: merge_zr32_3: ; CHECK: // %entry -; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 -; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] +; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 +; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #16] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #24] ; CHECK-NEXT: ret entry: store i32 0, i32* %p @@ -1437,7 +1453,8 @@ entry: define void @merge_zr32_2vec(<2 x i32>* %p) { ; CHECK-LABEL: merge_zr32_2vec: ; CHECK: // %entry -; CHECK-NEXT: str xzr, [x{{[0-9]+}}] +; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] ; CHECK-NEXT: ret entry: store <2 x i32> zeroinitializer, <2 x i32>* %p @@ -1448,8 +1465,10 @@ entry: define void @merge_zr32_3vec(<3 x i32>* %p) { ; CHECK-LABEL: merge_zr32_3vec: ; CHECK: // %entry -; CHECK-NEXT: str xzr, [x{{[0-9]+}}] -; CHECK-NEXT: str wzr, [x{{[0-9]+}}, #8] +; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] +; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8] ; CHECK-NEXT: ret entry: store <3 x i32> zeroinitializer, <3 x i32>* %p @@ -1460,7 +1479,9 @@ entry: define void @merge_zr32_4vec(<4 x i32>* %p) { ; CHECK-LABEL: merge_zr32_4vec: ; CHECK: // %entry -; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}] +; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] ; CHECK-NEXT: ret entry: store <4 x i32> zeroinitializer, <4 x i32>* %p @@ -1471,7 +1492,8 @@ entry: define void @merge_zr32_2vecf(<2 x float>* %p) { ; CHECK-LABEL: merge_zr32_2vecf: ; CHECK: // %entry -; CHECK-NEXT: str xzr, [x{{[0-9]+}}] +; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] ; CHECK-NEXT: ret entry: store <2 x float> zeroinitializer, <2 x float>* %p @@ -1482,7 +1504,9 @@ entry: define void @merge_zr32_4vecf(<4 x float>* %p) { ; CHECK-LABEL: merge_zr32_4vecf: ; CHECK: // %entry -; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}] +; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] ; CHECK-NEXT: ret entry: store <4 x float> zeroinitializer, <4 x float>* %p @@ -1502,13 +1526,42 @@ entry: ret void } +; Similar to merge_zr32, but for 64-bit values and with unaligned stores. +define void @merge_zr64_unalign(<2 x i64>* %p) { +; CHECK-LABEL: merge_zr64_unalign: +; CHECK: // %entry +; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] +; STRICTALIGN: strb wzr, +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; STRICTALIGN: strb +; CHECK-NEXT: ret +entry: + store <2 x i64> zeroinitializer, <2 x i64>* %p, align 1 + ret void +} + ; Similar to merge_zr32_3, replaceZeroVectorStore should not split the ; vector store since the zero constant vector has multiple uses. define void @merge_zr64_2(i64* %p) { ; CHECK-LABEL: merge_zr64_2: ; CHECK: // %entry -; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 -; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] +; NOSTRICTALIGN-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 +; NOSTRICTALIGN-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #16] ; CHECK-NEXT: ret entry: store i64 0, i64* %p diff --git a/test/CodeGen/AArch64/swifterror.ll b/test/CodeGen/AArch64/swifterror.ll index b15eaa923f08..69bf3510cc5a 100644 --- a/test/CodeGen/AArch64/swifterror.ll +++ b/test/CodeGen/AArch64/swifterror.ll @@ -13,18 +13,18 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) { ; CHECK-APPLE: malloc ; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1 ; CHECK-APPLE: strb [[ID]], [x0, #8] -; CHECK-APPLE: mov x19, x0 -; CHECK-APPLE-NOT: x19 +; CHECK-APPLE: mov x21, x0 +; CHECK-APPLE-NOT: x21 ; CHECK-O0-LABEL: foo: ; CHECK-O0: orr w{{.*}}, wzr, #0x10 ; CHECK-O0: malloc -; CHECK-O0: mov x19, x0 -; CHECK-O0-NOT: x19 +; CHECK-O0: mov x21, x0 +; CHECK-O0-NOT: x21 ; CHECK-O0: orr [[ID:w[0-9]+]], wzr, #0x1 -; CHECK-O0-NOT: x19 +; CHECK-O0-NOT: x21 ; CHECK-O0: strb [[ID]], [x0, #8] -; CHECK-O0-NOT: x19 +; CHECK-O0-NOT: x21 entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -38,20 +38,20 @@ entry: define float @caller(i8* %error_ref) { ; CHECK-APPLE-LABEL: caller: ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 -; CHECK-APPLE: mov x19, xzr +; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cbnz x19 +; CHECK-APPLE: cbnz x21 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x19 +; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller: -; CHECK-O0: mov x19 +; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo -; CHECK-O0: mov [[ID:x[0-9]+]], x19 -; CHECK-O0: cbnz x19 +; CHECK-O0: mov [[ID:x[0-9]+]], x21 +; CHECK-O0: cbnz x21 entry: %error_ptr_ref = alloca swifterror %swift_error* store %swift_error* null, %swift_error** %error_ptr_ref @@ -75,22 +75,22 @@ define float @caller2(i8* %error_ref) { ; CHECK-APPLE-LABEL: caller2: ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 ; CHECK-APPLE: fmov [[CMP:s[0-9]+]], #1.0 -; CHECK-APPLE: mov x19, xzr +; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cbnz x19 +; CHECK-APPLE: cbnz x21 ; CHECK-APPLE: fcmp s0, [[CMP]] ; CHECK-APPLE: b.le ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x19 +; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller2: -; CHECK-O0: mov x19 +; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo -; CHECK-O0: mov [[ID:x[0-9]+]], x19 -; CHECK-O0: cbnz x19 +; CHECK-O0: mov [[ID:x[0-9]+]], x21 +; CHECK-O0: cbnz x21 entry: %error_ptr_ref = alloca swifterror %swift_error* br label %bb_loop @@ -123,24 +123,24 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) { ; CHECK-APPLE: malloc ; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1 ; CHECK-APPLE: strb [[ID]], [x0, #8] -; CHECK-APPLE: mov x19, x0 -; CHECK-APPLE-NOT: x19 +; CHECK-APPLE: mov x21, x0 +; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ret ; CHECK-O0-LABEL: foo_if: -; spill x19 -; CHECK-O0: str x19, [sp, [[SLOT:#[0-9]+]]] +; spill x21 +; CHECK-O0: str x21, [sp, [[SLOT:#[0-9]+]]] ; CHECK-O0: cbz w0 ; CHECK-O0: orr w{{.*}}, wzr, #0x10 ; CHECK-O0: malloc ; CHECK-O0: mov [[ID:x[0-9]+]], x0 ; CHECK-O0: orr [[ID2:w[0-9]+]], wzr, #0x1 ; CHECK-O0: strb [[ID2]], [x0, #8] -; CHECK-O0: mov x19, [[ID]] +; CHECK-O0: mov x21, [[ID]] ; CHECK-O0: ret ; reload from stack ; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT]]] -; CHECK-O0: mov x19, [[ID3]] +; CHECK-O0: mov x21, [[ID3]] ; CHECK-O0: ret entry: %cond = icmp ne i32 %cc, 0 @@ -162,19 +162,19 @@ normal: ; under a certain condition inside a loop. define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-APPLE-LABEL: foo_loop: -; CHECK-APPLE: mov x0, x19 +; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: cbz ; CHECK-APPLE: orr w0, wzr, #0x10 ; CHECK-APPLE: malloc ; CHECK-APPLE: strb w{{.*}}, [x0, #8] ; CHECK-APPLE: fcmp ; CHECK-APPLE: b.le -; CHECK-APPLE: mov x19, x0 +; CHECK-APPLE: mov x21, x0 ; CHECK-APPLE: ret ; CHECK-O0-LABEL: foo_loop: -; spill x19 -; CHECK-O0: str x19, [sp, [[SLOT:#[0-9]+]]] +; spill x21 +; CHECK-O0: str x21, [sp, [[SLOT:#[0-9]+]]] ; CHECK-O0: b [[BB1:[A-Za-z0-9_]*]] ; CHECK-O0: [[BB1]]: ; CHECK-O0: ldr x0, [sp, [[SLOT]]] @@ -193,7 +193,7 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-O0: b.le [[BB1]] ; reload from stack ; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp] -; CHECK-O0: mov x19, [[ID3]] +; CHECK-O0: mov x21, [[ID3]] ; CHECK-O0: ret entry: br label %bb_loop @@ -229,23 +229,23 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi ; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1 ; CHECK-APPLE: strb [[ID]], [x0, #8] ; CHECK-APPLE: str w{{.*}}, [{{.*}}[[SRET]], #4] -; CHECK-APPLE: mov x19, x0 -; CHECK-APPLE-NOT: x19 +; CHECK-APPLE: mov x21, x0 +; CHECK-APPLE-NOT: x21 ; CHECK-O0-LABEL: foo_sret: ; CHECK-O0: orr w{{.*}}, wzr, #0x10 ; spill x8 ; CHECK-O0-DAG: str x8 -; spill x19 -; CHECK-O0-DAG: str x19 +; spill x21 +; CHECK-O0-DAG: str x21 ; CHECK-O0: malloc ; CHECK-O0: orr [[ID:w[0-9]+]], wzr, #0x1 ; CHECK-O0: strb [[ID]], [x0, #8] ; reload from stack ; CHECK-O0: ldr [[SRET:x[0-9]+]] ; CHECK-O0: str w{{.*}}, [{{.*}}[[SRET]], #4] -; CHECK-O0: mov x19 -; CHECK-O0-NOT: x19 +; CHECK-O0: mov x21 +; CHECK-O0-NOT: x21 entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -261,22 +261,22 @@ entry: define float @caller3(i8* %error_ref) { ; CHECK-APPLE-LABEL: caller3: ; CHECK-APPLE: mov [[ID:x[0-9]+]], x0 -; CHECK-APPLE: mov x19, xzr +; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_sret -; CHECK-APPLE: cbnz x19 +; CHECK-APPLE: cbnz x21 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x19 +; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller3: ; spill x0 ; CHECK-O0: str x0 -; CHECK-O0: mov x19 +; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo_sret -; CHECK-O0: mov [[ID2:x[0-9]+]], x19 -; CHECK-O0: cbnz [[ID2]] +; CHECK-O0: mov [[ID2:x[0-9]+]], x21 +; CHECK-O0: cbnz x21 ; Access part of the error object and save it to error_ref ; reload from stack ; CHECK-O0: ldrb [[CODE:w[0-9]+]] @@ -323,8 +323,8 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) { ; Third vararg ; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}] -; CHECK-APPLE: mov x19, x0 -; CHECK-APPLE-NOT: x19 +; CHECK-APPLE: mov x21, x0 +; CHECK-APPLE-NOT: x21 entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -356,13 +356,13 @@ define float @caller4(i8* %error_ref) { ; CHECK-APPLE: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] ; CHECK-APPLE: str {{x[0-9]+}}, [sp] -; CHECK-APPLE: mov x19, xzr +; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_vararg -; CHECK-APPLE: cbnz x19 +; CHECK-APPLE: cbnz x21 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x19, #8] +; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov x0, x19 +; CHECK-APPLE: mov x0, x21 ; CHECK-APPLE: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* @@ -407,29 +407,29 @@ entry: } ; CHECK-APPLE-LABEL: swifterror_clobber -; CHECK-APPLE: mov [[REG:x[0-9]+]], x19 +; CHECK-APPLE: mov [[REG:x[0-9]+]], x21 ; CHECK-APPLE: nop -; CHECK-APPLE: mov x19, [[REG]] +; CHECK-APPLE: mov x21, [[REG]] define swiftcc void @swifterror_clobber(%swift_error** nocapture swifterror %err) { - call void asm sideeffect "nop", "~{x19}"() + call void asm sideeffect "nop", "~{x21}"() ret void } ; CHECK-APPLE-LABEL: swifterror_reg_clobber -; CHECK-APPLE: stp {{.*}}x19 +; CHECK-APPLE: stp {{.*}}x21 ; CHECK-APPLE: nop -; CHECK-APPLE: ldp {{.*}}x19 +; CHECK-APPLE: ldp {{.*}}x21 define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) { - call void asm sideeffect "nop", "~{x19}"() + call void asm sideeffect "nop", "~{x21}"() ret void } ; CHECK-APPLE-LABEL: params_in_reg ; Save callee saved registers and swifterror since it will be clobbered by the first call to params_in_reg2. -; CHECK-APPLE: stp x19, x28, [sp +; CHECK-APPLE: stp x21, x28, [sp ; CHECK-APPLE: stp x27, x26, [sp ; CHECK-APPLE: stp x25, x24, [sp ; CHECK-APPLE: stp x23, x22, [sp -; CHECK-APPLE: stp x21, x20, [sp +; CHECK-APPLE: stp x20, x19, [sp ; CHECK-APPLE: stp x29, x30, [sp ; CHECK-APPLE: str x20, [sp ; Store argument registers. @@ -439,7 +439,7 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) { ; CHECK-APPLE: mov x26, x4 ; CHECK-APPLE: mov x27, x3 ; CHECK-APPLE: mov x28, x2 -; CHECK-APPLE: mov x21, x1 +; CHECK-APPLE: mov x19, x1 ; CHECK-APPLE: mov x22, x0 ; Setup call. ; CHECK-APPLE: orr w0, wzr, #0x1 @@ -451,11 +451,11 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) { ; CHECK-APPLE: orr w6, wzr, #0x7 ; CHECK-APPLE: orr w7, wzr, #0x8 ; CHECK-APPLE: mov x20, xzr -; CHECK-APPLE: mov x19, xzr +; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl _params_in_reg2 ; Restore original arguments for next call. ; CHECK-APPLE: mov x0, x22 -; CHECK-APPLE: mov x1, x21 +; CHECK-APPLE: mov x1, x19 ; CHECK-APPLE: mov x2, x28 ; CHECK-APPLE: mov x3, x27 ; CHECK-APPLE: mov x4, x26 @@ -463,22 +463,22 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) { ; CHECK-APPLE: mov x6, x24 ; CHECK-APPLE: mov x7, x23 ; Restore original swiftself argument and swifterror %err. -; CHECK-APPLE: ldp x20, x19, [sp +; CHECK-APPLE: ldp x20, x21, [sp ; CHECK-APPLE: bl _params_in_reg2 -; Restore calle save registers but don't clober swifterror x19. -; CHECK-APPLE-NOT: x19 +; Restore calle save registers but don't clober swifterror x21. +; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ldp x29, x30, [sp -; CHECK-APPLE-NOT: x19 -; CHECK-APPLE: ldp x21, x20, [sp -; CHECK-APPLE-NOT: x19 +; CHECK-APPLE-NOT: x21 +; CHECK-APPLE: ldp x20, x19, [sp +; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ldp x23, x22, [sp -; CHECK-APPLE-NOT: x19 +; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ldp x25, x24, [sp -; CHECK-APPLE-NOT: x19 +; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ldp x27, x26, [sp -; CHECK-APPLE-NOT: x19 +; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ldr x28, [sp -; CHECK-APPLE-NOT: x19 +; CHECK-APPLE-NOT: x21 ; CHECK-APPLE: ret define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) { %error_ptr_ref = alloca swifterror %swift_error*, align 8 @@ -495,17 +495,17 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* ; CHECK-APPLE: stp x27, x26, [sp ; CHECK-APPLE: stp x25, x24, [sp ; CHECK-APPLE: stp x23, x22, [sp -; CHECK-APPLE: stp x21, x20, [sp +; CHECK-APPLE: stp x20, x19, [sp ; CHECK-APPLE: stp x29, x30, [sp ; Save original arguments. -; CHECK-APPLE: mov x23, x19 +; CHECK-APPLE: mov x23, x21 ; CHECK-APPLE: str x7, [sp, #16] ; CHECK-APPLE: mov x24, x6 ; CHECK-APPLE: mov x25, x5 ; CHECK-APPLE: mov x26, x4 ; CHECK-APPLE: mov x27, x3 ; CHECK-APPLE: mov x28, x2 -; CHECK-APPLE: mov x21, x1 +; CHECK-APPLE: mov x19, x1 ; CHECK-APPLE: mov x22, x0 ; Setup call arguments. ; CHECK-APPLE: orr w0, wzr, #0x1 @@ -517,23 +517,23 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* ; CHECK-APPLE: orr w6, wzr, #0x7 ; CHECK-APPLE: orr w7, wzr, #0x8 ; CHECK-APPLE: mov x20, xzr -; CHECK-APPLE: mov x19, xzr +; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl _params_in_reg2 ; Store swifterror %error_ptr_ref. -; CHECK-APPLE: str x19, [sp, #8] +; CHECK-APPLE: str x21, [sp, #8] ; Setup call arguments from original arguments. ; CHECK-APPLE: mov x0, x22 -; CHECK-APPLE: mov x1, x21 +; CHECK-APPLE: mov x1, x19 ; CHECK-APPLE: mov x2, x28 ; CHECK-APPLE: mov x3, x27 ; CHECK-APPLE: mov x4, x26 ; CHECK-APPLE: mov x5, x25 ; CHECK-APPLE: mov x6, x24 ; CHECK-APPLE: ldp x7, x20, [sp, #16] -; CHECK-APPLE: mov x19, x23 +; CHECK-APPLE: mov x21, x23 ; CHECK-APPLE: bl _params_and_return_in_reg2 ; Store return values. -; CHECK-APPLE: mov x21, x0 +; CHECK-APPLE: mov x19, x0 ; CHECK-APPLE: mov x22, x1 ; CHECK-APPLE: mov x24, x2 ; CHECK-APPLE: mov x25, x3 @@ -542,7 +542,7 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* ; CHECK-APPLE: mov x28, x6 ; CHECK-APPLE: mov x23, x7 ; Save swifterror %err. -; CHECK-APPLE: str x19, [sp, #24] +; CHECK-APPLE: str x21, [sp, #24] ; Setup call. ; CHECK-APPLE: orr w0, wzr, #0x1 ; CHECK-APPLE: orr w1, wzr, #0x2 @@ -554,10 +554,10 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* ; CHECK-APPLE: orr w7, wzr, #0x8 ; CHECK-APPLE: mov x20, xzr ; ... setup call with swiferror %error_ptr_ref. -; CHECK-APPLE: ldr x19, [sp, #8] +; CHECK-APPLE: ldr x21, [sp, #8] ; CHECK-APPLE: bl _params_in_reg2 ; Restore return values for return from this function. -; CHECK-APPLE: mov x0, x21 +; CHECK-APPLE: mov x0, x19 ; CHECK-APPLE: mov x1, x22 ; CHECK-APPLE: mov x2, x24 ; CHECK-APPLE: mov x3, x25 @@ -566,9 +566,9 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* ; CHECK-APPLE: mov x6, x28 ; CHECK-APPLE: mov x7, x23 ; Restore swifterror %err and callee save registers. -; CHECK-APPLE: ldp x19, x28, [sp, #24 +; CHECK-APPLE: ldp x21, x28, [sp, #24 ; CHECK-APPLE: ldp x29, x30, [sp -; CHECK-APPLE: ldp x21, x20, [sp +; CHECK-APPLE: ldp x20, x19, [sp ; CHECK-APPLE: ldp x23, x22, [sp ; CHECK-APPLE: ldp x25, x24, [sp ; CHECK-APPLE: ldp x27, x26, [sp @@ -583,3 +583,17 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_ } declare swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) + +declare void @acallee(i8*) + +; Make sure we don't tail call if the caller returns a swifterror value. We +; would have to move into the swifterror register before the tail call. +; CHECK-APPLE: tailcall_from_swifterror: +; CHECK-APPLE-NOT: b _acallee +; CHECK-APPLE: bl _acallee + +define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) { +entry: + tail call void @acallee(i8* null) + ret void +} diff --git a/test/CodeGen/AArch64/swiftself.ll b/test/CodeGen/AArch64/swiftself.ll index a60aed6b0f2b..33a49198430e 100644 --- a/test/CodeGen/AArch64/swiftself.ll +++ b/test/CodeGen/AArch64/swiftself.ll @@ -65,3 +65,21 @@ define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind { %res = tail call i8* @swiftself_param(i8* swiftself %addr1) ret i8* %res } + +; We cannot pretend that 'x0' is alive across the thisreturn_attribute call as +; we normally would. We marked the first parameter with swiftself which means it +; will no longer be passed in x0. +declare swiftcc i8* @thisreturn_attribute(i8* returned swiftself) +; OPT-LABEL: swiftself_nothisreturn: +; OPT-DAG: ldr x20, [x20] +; OPT-DAG: mov [[CSREG:x[1-9].*]], x8 +; OPT: bl {{_?}}thisreturn_attribute +; OPT: str x0, {{\[}}[[CSREG]] +; OPT: ret +define hidden swiftcc void @swiftself_nothisreturn(i8** noalias nocapture sret, i8** noalias nocapture readonly swiftself) { +entry: + %2 = load i8*, i8** %1, align 8 + %3 = tail call swiftcc i8* @thisreturn_attribute(i8* swiftself %2) + store i8* %3, i8** %0, align 8 + ret void +} diff --git a/test/CodeGen/ARM/swifterror.ll b/test/CodeGen/ARM/swifterror.ll index 7551291207ed..78764202f627 100644 --- a/test/CodeGen/ARM/swifterror.ll +++ b/test/CodeGen/ARM/swifterror.ll @@ -13,7 +13,7 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) { ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc ; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], #1 -; CHECK-APPLE-DAG: mov r6, r{{.*}} +; CHECK-APPLE-DAG: mov r8, r{{.*}} ; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8] ; CHECK-O0-LABEL: foo: @@ -22,7 +22,7 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) { ; CHECK-O0: mov [[ID2:r[0-9]+]], r0 ; CHECK-O0: mov [[ID:r[0-9]+]], #1 ; CHECK-O0: strb [[ID]], [r0, #8] -; CHECK-O0: mov r6, [[ID2]] +; CHECK-O0: mov r8, [[ID2]] entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -36,21 +36,21 @@ entry: define float @caller(i8* %error_ref) { ; CHECK-APPLE-LABEL: caller: ; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], r0 -; CHECK-APPLE-DAG: mov r6, #0 +; CHECK-APPLE-DAG: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cmp r6, #0 +; CHECK-APPLE: cmp r8, #0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r6, #8] +; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov r0, r6 +; CHECK-APPLE: mov r0, r8 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller: ; spill r0 -; CHECK-O0-DAG: mov r6, #0 +; CHECK-O0-DAG: mov r8, #0 ; CHECK-O0-DAG: str r0, [sp, [[SLOT:#[0-9]+]] ; CHECK-O0: bl {{.*}}foo -; CHECK-O0: mov [[TMP:r[0-9]+]], r6 +; CHECK-O0: mov [[TMP:r[0-9]+]], r8 ; CHECK-O0: str [[TMP]], [sp] ; CHECK-O0: bne ; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8] @@ -81,22 +81,22 @@ handler: define float @caller2(i8* %error_ref) { ; CHECK-APPLE-LABEL: caller2: ; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], r0 -; CHECK-APPLE-DAG: mov r6, #0 +; CHECK-APPLE-DAG: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo -; CHECK-APPLE: cmp r6, #0 +; CHECK-APPLE: cmp r8, #0 ; CHECK-APPLE: bne ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrb [[CODE:r[0-9]+]], [r6, #8] +; CHECK-APPLE: ldrb [[CODE:r[0-9]+]], [r8, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov r0, r6 +; CHECK-APPLE: mov r0, r8 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller2: ; spill r0 ; CHECK-O0-DAG: str r0, -; CHECK-O0-DAG: mov r6, #0 +; CHECK-O0-DAG: mov r8, #0 ; CHECK-O0: bl {{.*}}foo -; CHECK-O0: mov r{{.*}}, r6 +; CHECK-O0: mov r{{.*}}, r8 ; CHECK-O0: str r0, [sp] ; CHECK-O0: bne ; CHECK-O0: ble @@ -138,22 +138,22 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) { ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc ; CHECK-APPLE: mov [[ID:r[0-9]+]], #1 -; CHECK-APPLE-DAG: mov r6, r{{.*}} +; CHECK-APPLE-DAG: mov r8, r{{.*}} ; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8] ; CHECK-O0-LABEL: foo_if: ; CHECK-O0: cmp r0, #0 ; spill to stack -; CHECK-O0: str r6 +; CHECK-O0: str r8 ; CHECK-O0: beq ; CHECK-O0: mov r0, #16 ; CHECK-O0: malloc ; CHECK-O0: mov [[ID:r[0-9]+]], r0 ; CHECK-O0: mov [[ID2:[a-z0-9]+]], #1 ; CHECK-O0: strb [[ID2]], [r0, #8] -; CHECK-O0: mov r6, [[ID]] +; CHECK-O0: mov r8, [[ID]] ; reload from stack -; CHECK-O0: ldr r6 +; CHECK-O0: ldr r8 entry: %cond = icmp ne i32 %cc, 0 br i1 %cond, label %gen_error, label %normal @@ -176,17 +176,17 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-APPLE-LABEL: foo_loop: ; CHECK-APPLE: mov [[CODE:r[0-9]+]], r0 ; swifterror is kept in a register -; CHECK-APPLE: mov [[ID:r[0-9]+]], r6 +; CHECK-APPLE: mov [[ID:r[0-9]+]], r8 ; CHECK-APPLE: cmp [[CODE]], #0 ; CHECK-APPLE: beq ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc ; CHECK-APPLE: strb r{{.*}}, [{{.*}}[[ID]], #8] ; CHECK-APPLE: ble -; CHECK-APPLE: mov r6, [[ID]] +; CHECK-APPLE: mov r8, [[ID]] ; CHECK-O0-LABEL: foo_loop: -; CHECK-O0: mov r{{.*}}, r6 +; CHECK-O0: mov r{{.*}}, r8 ; CHECK-O0: cmp r{{.*}}, #0 ; CHECK-O0: beq ; CHECK-O0-DAG: movw r{{.*}}, #1 @@ -200,7 +200,7 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-O0: vcmpe ; CHECK-O0: ble ; reload from stack -; CHECK-O0: ldr r6 +; CHECK-O0: ldr r8 entry: br label %bb_loop @@ -231,7 +231,7 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc ; CHECK-APPLE: mov [[REG:r[0-9]+]], #1 -; CHECK-APPLE-DAG: mov r6, r0 +; CHECK-APPLE-DAG: mov r8, r0 ; CHECK-APPLE-DAG: strb [[REG]], [r0, #8] ; CHECK-APPLE-DAG: str r{{.*}}, [{{.*}}[[SRET]], #4] @@ -247,7 +247,7 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi ; CHECK-O0: ldr ; CHECK-O0: ldr ; CHECK-O0: str r{{.*}}, [{{.*}}, #4] -; CHECK-O0: mov r6 +; CHECK-O0: mov r8 entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -263,22 +263,22 @@ entry: define float @caller3(i8* %error_ref) { ; CHECK-APPLE-LABEL: caller3: ; CHECK-APPLE: mov [[ID:r[0-9]+]], r0 -; CHECK-APPLE: mov r6, #0 +; CHECK-APPLE: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo_sret -; CHECK-APPLE: cmp r6, #0 +; CHECK-APPLE: cmp r8, #0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r6, #8] +; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov r0, r6 +; CHECK-APPLE: mov r0, r8 ; CHECK-APPLE: bl {{.*}}free ; CHECK-O0-LABEL: caller3: -; CHECK-O0-DAG: mov r6, #0 +; CHECK-O0-DAG: mov r8, #0 ; CHECK-O0-DAG: mov r0 ; CHECK-O0-DAG: mov r1 ; CHECK-O0: bl {{.*}}foo_sret -; CHECK-O0: mov [[ID2:r[0-9]+]], r6 -; CHECK-O0: cmp r6 +; CHECK-O0: mov [[ID2:r[0-9]+]], r8 +; CHECK-O0: cmp r8 ; CHECK-O0: str [[ID2]], [sp[[SLOT:.*]]] ; CHECK-O0: bne ; Access part of the error object and save it to error_ref @@ -316,7 +316,7 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) { ; CHECK-APPLE: mov [[REG:r[0-9]+]], r0 ; CHECK-APPLE: mov [[ID:r[0-9]+]], #1 ; CHECK-APPLE-DAG: strb [[ID]], [{{.*}}[[REG]], #8] -; CHECK-APPLE-DAG: mov r6, [[REG]] +; CHECK-APPLE-DAG: mov r8, [[REG]] entry: %call = call i8* @malloc(i64 16) @@ -345,13 +345,13 @@ entry: define float @caller4(i8* %error_ref) { ; CHECK-APPLE-LABEL: caller4: ; CHECK-APPLE: mov [[ID:r[0-9]+]], r0 -; CHECK-APPLE: mov r6, #0 +; CHECK-APPLE: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo_vararg -; CHECK-APPLE: cmp r6, #0 +; CHECK-APPLE: cmp r8, #0 ; Access part of the error object and save it to error_ref -; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r6, #8] +; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r8, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] -; CHECK-APPLE: mov r0, r6 +; CHECK-APPLE: mov r0, r8 ; CHECK-APPLE: bl {{.*}}free entry: %error_ptr_ref = alloca swifterror %swift_error* @@ -396,51 +396,51 @@ entry: } ; CHECK-APPLE-LABEL: swifterror_clobber -; CHECK-APPLE: mov [[REG:r[0-9]+]], r6 +; CHECK-APPLE: mov [[REG:r[0-9]+]], r8 ; CHECK-APPLE: nop -; CHECK-APPLE: mov r6, [[REG]] +; CHECK-APPLE: mov r8, [[REG]] define swiftcc void @swifterror_clobber(%swift_error** nocapture swifterror %err) { - call void asm sideeffect "nop", "~{r6}"() + call void asm sideeffect "nop", "~{r8}"() ret void } ; CHECK-APPLE-LABEL: swifterror_reg_clobber -; CHECK-APPLE: push {{.*}}r6 +; CHECK-APPLE: push {{.*}}r8 ; CHECK-APPLE: nop -; CHECK-APPLE: pop {{.*}}r6 +; CHECK-APPLE: pop {{.*}}r8 define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) { - call void asm sideeffect "nop", "~{r6}"() + call void asm sideeffect "nop", "~{r8}"() ret void } ; CHECK-ARMV7-LABEL: _params_in_reg ; Store callee saved registers excluding swifterror. -; CHECK-ARMV7: push {r4, r5, r7, r8, r10, r11, lr} -; Store swiftself (r10) and swifterror (r6). -; CHECK-ARMV7-DAG: str r6, [s[[STK1:.*]]] +; CHECK-ARMV7: push {r4, r5, r6, r7, r10, r11, lr} +; Store swiftself (r10) and swifterror (r8). +; CHECK-ARMV7-DAG: str r8, [s[[STK1:.*]]] ; CHECK-ARMV7-DAG: str r10, [s[[STK2:.*]]] ; Store arguments. -; CHECK-ARMV7: mov r4, r3 -; CHECK-ARMV7: mov r5, r2 -; CHECK-ARMV7: mov r8, r1 -; CHECK-ARMV7: mov r11, r0 +; CHECK-ARMV7: mov r6, r3 +; CHECK-ARMV7: mov r4, r2 +; CHECK-ARMV7: mov r11, r1 +; CHECK-ARMV7: mov r5, r0 ; Setup call. ; CHECK-ARMV7: mov r0, #1 ; CHECK-ARMV7: mov r1, #2 ; CHECK-ARMV7: mov r2, #3 ; CHECK-ARMV7: mov r3, #4 ; CHECK-ARMV7: mov r10, #0 -; CHECK-ARMV7: mov r6, #0 +; CHECK-ARMV7: mov r8, #0 ; CHECK-ARMV7: bl _params_in_reg2 ; Restore original arguments. ; CHECK-ARMV7-DAG: ldr r10, [s[[STK2]]] -; CHECK-ARMV7-DAG: ldr r6, [s[[STK1]]] -; CHECK-ARMV7: mov r0, r11 -; CHECK-ARMV7: mov r1, r8 -; CHECK-ARMV7: mov r2, r5 -; CHECK-ARMV7: mov r3, r4 +; CHECK-ARMV7-DAG: ldr r8, [s[[STK1]]] +; CHECK-ARMV7: mov r0, r5 +; CHECK-ARMV7: mov r1, r11 +; CHECK-ARMV7: mov r2, r4 +; CHECK-ARMV7: mov r3, r6 ; CHECK-ARMV7: bl _params_in_reg2 -; CHECK-ARMV7: pop {r4, r5, r7, r8, r10, r11, pc} +; CHECK-ARMV7: pop {r4, r5, r6, r7, r10, r11, pc} define swiftcc void @params_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) { %error_ptr_ref = alloca swifterror %swift_error*, align 8 store %swift_error* null, %swift_error** %error_ptr_ref @@ -451,42 +451,42 @@ define swiftcc void @params_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_err declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) ; CHECK-ARMV7-LABEL: params_and_return_in_reg -; CHECK-ARMV7: push {r4, r5, r7, r8, r10, r11, lr} +; CHECK-ARMV7: push {r4, r5, r6, r7, r10, r11, lr} ; Store swifterror and swiftself -; CHECK-ARMV7: mov r4, r6 +; CHECK-ARMV7: mov r6, r8 ; CHECK-ARMV7: str r10, [s[[STK1:.*]]] ; Store arguments. ; CHECK-ARMV7: str r3, [s[[STK2:.*]]] -; CHECK-ARMV7: mov r5, r2 -; CHECK-ARMV7: mov r8, r1 -; CHECK-ARMV7: mov r11, r0 +; CHECK-ARMV7: mov r4, r2 +; CHECK-ARMV7: mov r11, r1 +; CHECK-ARMV7: mov r5, r0 ; Setup call. ; CHECK-ARMV7: mov r0, #1 ; CHECK-ARMV7: mov r1, #2 ; CHECK-ARMV7: mov r2, #3 ; CHECK-ARMV7: mov r3, #4 ; CHECK-ARMV7: mov r10, #0 -; CHECK-ARMV7: mov r6, #0 +; CHECK-ARMV7: mov r8, #0 ; CHECK-ARMV7: bl _params_in_reg2 ; Restore original arguments. ; CHECK-ARMV7: ldr r3, [s[[STK2]]] ; CHECK-ARMV7: ldr r10, [s[[STK1]]] ; Store %error_ptr_ref; -; CHECK-ARMV7: str r6, [s[[STK3:.*]]] +; CHECK-ARMV7: str r8, [s[[STK3:.*]]] ; Restore original arguments. -; CHECK-ARMV7: mov r0, r11 -; CHECK-ARMV7: mov r1, r8 -; CHECK-ARMV7: mov r2, r5 -; CHECK-ARMV7: mov r6, r4 +; CHECK-ARMV7: mov r0, r5 +; CHECK-ARMV7: mov r1, r11 +; CHECK-ARMV7: mov r2, r4 +; CHECK-ARMV7: mov r8, r6 ; CHECK-ARMV7: bl _params_and_return_in_reg2 ; Store swifterror return %err; -; CHECK-ARMV7: str r6, [s[[STK1]]] +; CHECK-ARMV7: str r8, [s[[STK1]]] ; Load swifterror value %error_ptr_ref. -; CHECK-ARMV7: ldr r6, [s[[STK3]]] +; CHECK-ARMV7: ldr r8, [s[[STK3]]] ; Save return values. -; CHECK-ARMV7: mov r5, r0 -; CHECK-ARMV7: mov r4, r1 -; CHECK-ARMV7: mov r8, r2 +; CHECK-ARMV7: mov r4, r0 +; CHECK-ARMV7: mov r5, r1 +; CHECK-ARMV7: mov r6, r2 ; CHECK-ARMV7: mov r11, r3 ; Setup call. ; CHECK-ARMV7: mov r0, #1 @@ -496,13 +496,13 @@ declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_e ; CHECK-ARMV7: mov r10, #0 ; CHECK-ARMV7: bl _params_in_reg2 ; Load swifterror %err; -; CHECK-ARMV7: ldr r6, [s[[STK1]]] +; CHECK-ARMV7: ldr r8, [s[[STK1]]] ; Restore return values for returning. -; CHECK-ARMV7: mov r0, r5 -; CHECK-ARMV7: mov r1, r4 -; CHECK-ARMV7: mov r2, r8 +; CHECK-ARMV7: mov r0, r4 +; CHECK-ARMV7: mov r1, r5 +; CHECK-ARMV7: mov r2, r6 ; CHECK-ARMV7: mov r3, r11 -; CHECK-ARMV7: pop {r4, r5, r7, r8, r10, r11, pc} +; CHECK-ARMV7: pop {r4, r5, r6, r7, r10, r11, pc} define swiftcc { i32, i32, i32, i32} @params_and_return_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) { %error_ptr_ref = alloca swifterror %swift_error*, align 8 store %swift_error* null, %swift_error** %error_ptr_ref @@ -513,3 +513,18 @@ define swiftcc { i32, i32, i32, i32} @params_and_return_in_reg(i32, i32, i32, i3 } declare swiftcc { i32, i32, i32, i32 } @params_and_return_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) + + +declare void @acallee(i8*) + +; Make sure we don't tail call if the caller returns a swifterror value. We +; would have to move into the swifterror register before the tail call. +; CHECK-APPLE: tailcall_from_swifterror: +; CHECK-APPLE-NOT: b _acallee +; CHECK-APPLE: bl _acallee + +define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) { +entry: + tail call void @acallee(i8* null) + ret void +} diff --git a/test/CodeGen/ARM/swiftself.ll b/test/CodeGen/ARM/swiftself.ll index b7a04ca4060e..1e06b34c7052 100644 --- a/test/CodeGen/ARM/swiftself.ll +++ b/test/CodeGen/ARM/swiftself.ll @@ -63,3 +63,20 @@ define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind "no-fram %res = tail call i8* @swiftself_param(i8* swiftself %addr1) ret i8* %res } + +; We cannot pretend that 'r0' is alive across the thisreturn_attribute call as +; we normally would. We marked the first parameter with swiftself which means it +; will no longer be passed in r0. +declare swiftcc i8* @thisreturn_attribute(i8* returned swiftself) +; OPT-LABEL: swiftself_nothisreturn: +; OPT-DAG: mov [[CSREG:r[1-9].*]], r0 +; OPT-DAG: ldr r10, [r10] +; OPT: bl {{_?}}thisreturn_attribute +; OPT: str r0, {{\[}}[[CSREG]] +define hidden swiftcc void @swiftself_nothisreturn(i8** noalias nocapture sret, i8** noalias nocapture readonly swiftself) { +entry: + %2 = load i8*, i8** %1, align 8 + %3 = tail call swiftcc i8* @thisreturn_attribute(i8* swiftself %2) + store i8* %3, i8** %0, align 8 + ret void +} diff --git a/test/CodeGen/X86/dag-update-nodetomatch.ll b/test/CodeGen/X86/dag-update-nodetomatch.ll new file mode 100644 index 000000000000..45b6d020ce45 --- /dev/null +++ b/test/CodeGen/X86/dag-update-nodetomatch.ll @@ -0,0 +1,241 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +%struct.i = type { i32, i24 } +%struct.m = type { %struct.i } + +@a = local_unnamed_addr global i32 0, align 4 +@b = local_unnamed_addr global i16 0, align 2 +@c = local_unnamed_addr global i16 0, align 2 +@e = local_unnamed_addr global i16 0, align 2 +@l = local_unnamed_addr global %struct.i zeroinitializer, align 4 +@k = local_unnamed_addr global %struct.m zeroinitializer, align 4 + +@x0 = local_unnamed_addr global double 0.000000e+00, align 8 +@x1 = local_unnamed_addr global i32 0, align 4 +@x2 = local_unnamed_addr global i32 0, align 4 +@x3 = local_unnamed_addr global i32 0, align 4 +@x4 = local_unnamed_addr global i32 0, align 4 +@x5 = local_unnamed_addr global double* null, align 8 + +; Check that compiler does not crash. +; Test for PR30775 +define void @_Z1nv() local_unnamed_addr { +; CHECK-LABEL: _Z1nv: +entry: + %bf.load = load i32, i32* bitcast (i24* getelementptr inbounds (%struct.m, %struct.m* @k, i64 0, i32 0, i32 1) to i32*), align 4 + %0 = load i16, i16* @c, align 2 + %conv = sext i16 %0 to i32 + %1 = load i16, i16* @b, align 2 + %conv1 = sext i16 %1 to i32 + %2 = load i32, i32* @a, align 4 + %tobool = icmp ne i32 %2, 0 + %bf.load3 = load i32, i32* getelementptr inbounds (%struct.i, %struct.i* @l, i64 0, i32 0), align 4 + %bf.shl = shl i32 %bf.load3, 7 + %bf.ashr = ashr exact i32 %bf.shl, 7 + %bf.clear = shl i32 %bf.load, 1 + %factor = and i32 %bf.clear, 131070 + %add13 = add nsw i32 %factor, %conv + %add15 = add nsw i32 %add13, %conv1 + %bf.ashr.op = sub nsw i32 0, %bf.ashr + %add28 = select i1 %tobool, i32 %bf.ashr.op, i32 0 + %tobool29 = icmp eq i32 %add15, %add28 + %phitmp = icmp eq i32 %bf.ashr, 0 + %.phitmp = or i1 %phitmp, %tobool29 + %conv37 = zext i1 %.phitmp to i16 + store i16 %conv37, i16* @e, align 2 + %bf.clear39 = and i32 %bf.load, 65535 + %factor53 = shl nuw nsw i32 %bf.clear39, 1 + %add46 = add nsw i32 %factor53, %conv + %add48 = add nsw i32 %add46, %conv1 + %add48.lobit = lshr i32 %add48, 31 + %add48.lobit.not = xor i32 %add48.lobit, 1 + %add51 = add nuw nsw i32 %add48.lobit.not, %bf.clear39 + %shr = ashr i32 %2, %add51 + %conv52 = trunc i32 %shr to i16 + store i16 %conv52, i16* @b, align 2 + ret void +} + +; Test for PR31536 +define void @_Z2x6v() local_unnamed_addr { +; CHECK-LABEL: _Z2x6v: +entry: + %0 = load i32, i32* @x1, align 4 + %and = and i32 %0, 511 + %add = add nuw nsw i32 %and, 1 + store i32 %add, i32* @x4, align 4 + %.pr = load i32, i32* @x3, align 4 + %tobool8 = icmp eq i32 %.pr, 0 + br i1 %tobool8, label %for.end5, label %for.cond1thread-pre-split.lr.ph + +for.cond1thread-pre-split.lr.ph: ; preds = %entry + %idx.ext13 = zext i32 %add to i64 + %x5.promoted = load double*, double** @x5, align 8 + %x5.promoted9 = bitcast double* %x5.promoted to i8* + %1 = xor i32 %.pr, -1 + %2 = zext i32 %1 to i64 + %3 = shl nuw nsw i64 %2, 3 + %4 = add nuw nsw i64 %3, 8 + %5 = mul nuw nsw i64 %4, %idx.ext13 + %uglygep = getelementptr i8, i8* %x5.promoted9, i64 %5 + %.pr6.pre = load i32, i32* @x2, align 4 + %6 = shl nuw nsw i32 %and, 3 + %addconv = add nuw nsw i32 %6, 8 + %7 = zext i32 %addconv to i64 + %scevgep15 = getelementptr double, double* %x5.promoted, i64 1 + %scevgep1516 = bitcast double* %scevgep15 to i8* + br label %for.cond1thread-pre-split + +for.cond1thread-pre-split: ; preds = %for.cond1thread-pre-split.lr.ph, %for.inc3 + %indvar = phi i64 [ 0, %for.cond1thread-pre-split.lr.ph ], [ %indvar.next, %for.inc3 ] + %.pr6 = phi i32 [ %.pr6.pre, %for.cond1thread-pre-split.lr.ph ], [ %.pr611, %for.inc3 ] + %8 = phi double* [ %x5.promoted, %for.cond1thread-pre-split.lr.ph ], [ %add.ptr, %for.inc3 ] + %9 = phi i32 [ %.pr, %for.cond1thread-pre-split.lr.ph ], [ %inc4, %for.inc3 ] + %10 = mul i64 %7, %indvar + %uglygep14 = getelementptr i8, i8* %x5.promoted9, i64 %10 + %uglygep17 = getelementptr i8, i8* %scevgep1516, i64 %10 + %cmp7 = icmp slt i32 %.pr6, 0 + br i1 %cmp7, label %for.body2.preheader, label %for.inc3 + +for.body2.preheader: ; preds = %for.cond1thread-pre-split + %11 = sext i32 %.pr6 to i64 + %12 = sext i32 %.pr6 to i64 + %13 = icmp sgt i64 %12, -1 + %smax = select i1 %13, i64 %12, i64 -1 + %14 = add nsw i64 %smax, 1 + %15 = sub nsw i64 %14, %12 + %min.iters.check = icmp ult i64 %15, 4 + br i1 %min.iters.check, label %for.body2.preheader21, label %min.iters.checked + +min.iters.checked: ; preds = %for.body2.preheader + %n.vec = and i64 %15, -4 + %cmp.zero = icmp eq i64 %n.vec, 0 + br i1 %cmp.zero, label %for.body2.preheader21, label %vector.memcheck + +vector.memcheck: ; preds = %min.iters.checked + %16 = shl nsw i64 %11, 3 + %scevgep = getelementptr i8, i8* %uglygep14, i64 %16 + %17 = icmp sgt i64 %11, -1 + %smax18 = select i1 %17, i64 %11, i64 -1 + %18 = shl nsw i64 %smax18, 3 + %scevgep19 = getelementptr i8, i8* %uglygep17, i64 %18 + %bound0 = icmp ult i8* %scevgep, bitcast (double* @x0 to i8*) + %bound1 = icmp ugt i8* %scevgep19, bitcast (double* @x0 to i8*) + %memcheck.conflict = and i1 %bound0, %bound1 + %ind.end = add nsw i64 %11, %n.vec + br i1 %memcheck.conflict, label %for.body2.preheader21, label %vector.body.preheader + +vector.body.preheader: ; preds = %vector.memcheck + %19 = add nsw i64 %n.vec, -4 + %20 = lshr exact i64 %19, 2 + %21 = and i64 %20, 1 + %lcmp.mod = icmp eq i64 %21, 0 + br i1 %lcmp.mod, label %vector.body.prol.preheader, label %vector.body.prol.loopexit.unr-lcssa + +vector.body.prol.preheader: ; preds = %vector.body.preheader + br label %vector.body.prol + +vector.body.prol: ; preds = %vector.body.prol.preheader + %22 = load i64, i64* bitcast (double* @x0 to i64*), align 8 + %23 = insertelement <2 x i64> undef, i64 %22, i32 0 + %24 = shufflevector <2 x i64> %23, <2 x i64> undef, <2 x i32> zeroinitializer + %25 = insertelement <2 x i64> undef, i64 %22, i32 0 + %26 = shufflevector <2 x i64> %25, <2 x i64> undef, <2 x i32> zeroinitializer + %27 = getelementptr inbounds double, double* %8, i64 %11 + %28 = bitcast double* %27 to <2 x i64>* + store <2 x i64> %24, <2 x i64>* %28, align 8 + %29 = getelementptr double, double* %27, i64 2 + %30 = bitcast double* %29 to <2 x i64>* + store <2 x i64> %26, <2 x i64>* %30, align 8 + br label %vector.body.prol.loopexit.unr-lcssa + +vector.body.prol.loopexit.unr-lcssa: ; preds = %vector.body.preheader, %vector.body.prol + %index.unr.ph = phi i64 [ 4, %vector.body.prol ], [ 0, %vector.body.preheader ] + br label %vector.body.prol.loopexit + +vector.body.prol.loopexit: ; preds = %vector.body.prol.loopexit.unr-lcssa + %31 = icmp eq i64 %20, 0 + br i1 %31, label %middle.block, label %vector.body.preheader.new + +vector.body.preheader.new: ; preds = %vector.body.prol.loopexit + %32 = load i64, i64* bitcast (double* @x0 to i64*), align 8 + %33 = insertelement <2 x i64> undef, i64 %32, i32 0 + %34 = shufflevector <2 x i64> %33, <2 x i64> undef, <2 x i32> zeroinitializer + %35 = insertelement <2 x i64> undef, i64 %32, i32 0 + %36 = shufflevector <2 x i64> %35, <2 x i64> undef, <2 x i32> zeroinitializer + %37 = load i64, i64* bitcast (double* @x0 to i64*), align 8 + %38 = insertelement <2 x i64> undef, i64 %37, i32 0 + %39 = shufflevector <2 x i64> %38, <2 x i64> undef, <2 x i32> zeroinitializer + %40 = insertelement <2 x i64> undef, i64 %37, i32 0 + %41 = shufflevector <2 x i64> %40, <2 x i64> undef, <2 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.body.preheader.new + %index = phi i64 [ %index.unr.ph, %vector.body.preheader.new ], [ %index.next.1, %vector.body ] + %42 = add i64 %11, %index + %43 = getelementptr inbounds double, double* %8, i64 %42 + %44 = bitcast double* %43 to <2 x i64>* + store <2 x i64> %34, <2 x i64>* %44, align 8 + %45 = getelementptr double, double* %43, i64 2 + %46 = bitcast double* %45 to <2 x i64>* + store <2 x i64> %36, <2 x i64>* %46, align 8 + %index.next = add i64 %index, 4 + %47 = add i64 %11, %index.next + %48 = getelementptr inbounds double, double* %8, i64 %47 + %49 = bitcast double* %48 to <2 x i64>* + store <2 x i64> %39, <2 x i64>* %49, align 8 + %50 = getelementptr double, double* %48, i64 2 + %51 = bitcast double* %50 to <2 x i64>* + store <2 x i64> %41, <2 x i64>* %51, align 8 + %index.next.1 = add i64 %index, 8 + %52 = icmp eq i64 %index.next.1, %n.vec + br i1 %52, label %middle.block.unr-lcssa, label %vector.body + +middle.block.unr-lcssa: ; preds = %vector.body + br label %middle.block + +middle.block: ; preds = %vector.body.prol.loopexit, %middle.block.unr-lcssa + %cmp.n = icmp eq i64 %15, %n.vec + br i1 %cmp.n, label %for.cond1.for.inc3_crit_edge, label %for.body2.preheader21 + +for.body2.preheader21: ; preds = %middle.block, %vector.memcheck, %min.iters.checked, %for.body2.preheader + %indvars.iv.ph = phi i64 [ %11, %vector.memcheck ], [ %11, %min.iters.checked ], [ %11, %for.body2.preheader ], [ %ind.end, %middle.block ] + br label %for.body2 + +for.body2: ; preds = %for.body2.preheader21, %for.body2 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body2 ], [ %indvars.iv.ph, %for.body2.preheader21 ] + %53 = load i64, i64* bitcast (double* @x0 to i64*), align 8 + %arrayidx = getelementptr inbounds double, double* %8, i64 %indvars.iv + %54 = bitcast double* %arrayidx to i64* + store i64 %53, i64* %54, align 8 + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %cmp = icmp slt i64 %indvars.iv, -1 + br i1 %cmp, label %for.body2, label %for.cond1.for.inc3_crit_edge.loopexit + +for.cond1.for.inc3_crit_edge.loopexit: ; preds = %for.body2 + br label %for.cond1.for.inc3_crit_edge + +for.cond1.for.inc3_crit_edge: ; preds = %for.cond1.for.inc3_crit_edge.loopexit, %middle.block + %indvars.iv.next.lcssa = phi i64 [ %ind.end, %middle.block ], [ %indvars.iv.next, %for.cond1.for.inc3_crit_edge.loopexit ] + %55 = trunc i64 %indvars.iv.next.lcssa to i32 + store i32 %55, i32* @x2, align 4 + br label %for.inc3 + +for.inc3: ; preds = %for.cond1.for.inc3_crit_edge, %for.cond1thread-pre-split + %.pr611 = phi i32 [ %55, %for.cond1.for.inc3_crit_edge ], [ %.pr6, %for.cond1thread-pre-split ] + %inc4 = add nsw i32 %9, 1 + %add.ptr = getelementptr inbounds double, double* %8, i64 %idx.ext13 + %tobool = icmp eq i32 %inc4, 0 + %indvar.next = add i64 %indvar, 1 + br i1 %tobool, label %for.cond.for.end5_crit_edge, label %for.cond1thread-pre-split + +for.cond.for.end5_crit_edge: ; preds = %for.inc3 + store i8* %uglygep, i8** bitcast (double** @x5 to i8**), align 8 + store i32 0, i32* @x3, align 4 + br label %for.end5 + +for.end5: ; preds = %for.cond.for.end5_crit_edge, %entry + ret void +} + diff --git a/test/CodeGen/X86/pr31956.ll b/test/CodeGen/X86/pr31956.ll new file mode 100644 index 000000000000..e9293048f4e5 --- /dev/null +++ b/test/CodeGen/X86/pr31956.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=+avx < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +@G1 = common global <2 x float> zeroinitializer, align 8 +@G2 = common global <8 x float> zeroinitializer, align 32 + +define <4 x float> @foo() { +; CHECK-LABEL: foo: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3] +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0] +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %V = load <2 x float>, <2 x float>* @G1, align 8 + %shuffle = shufflevector <2 x float> %V, <2 x float> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef> + %L = load <8 x float>, <8 x float>* @G2, align 32 + %shuffle1 = shufflevector <8 x float> %shuffle, <8 x float> %L, <4 x i32> <i32 12, i32 10, i32 14, i32 4> + ret <4 x float> %shuffle1 +} diff --git a/test/CodeGen/X86/swifterror.ll b/test/CodeGen/X86/swifterror.ll index cd4150597225..86e0221c2015 100644 --- a/test/CodeGen/X86/swifterror.ll +++ b/test/CodeGen/X86/swifterror.ll @@ -670,3 +670,18 @@ define swiftcc { i64, i64, i64, i64} @params_and_return_in_reg(i64, i64, i64, i6 } declare swiftcc { i64, i64, i64, i64 } @params_and_return_in_reg2(i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) + + +declare void @acallee(i8*) + +; Make sure we don't tail call if the caller returns a swifterror value. We +; would have to move into the swifterror register before the tail call. +; CHECK-APPLE: tailcall_from_swifterror: +; CHECK-APPLE-NOT: jmp _acallee +; CHECK-APPLE: callq _acallee + +define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) { +entry: + tail call void @acallee(i8* null) + ret void +} diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll index 9c4d416a1eff..9827e7a6792b 100644 --- a/test/Instrumentation/AddressSanitizer/basic.ll +++ b/test/Instrumentation/AddressSanitizer/basic.ll @@ -170,6 +170,32 @@ define void @memintr_test(i8* %a, i8* %b) nounwind uwtable sanitize_address { ; CHECK: __asan_memcpy ; CHECK: ret void +; CHECK-LABEL: @test_swifterror +; CHECK-NOT: __asan_report_load +; CHECK: ret void +define void @test_swifterror(i8** swifterror) sanitize_address { + %swifterror_ptr_value = load i8*, i8** %0 + ret void +} + +; CHECK-LABEL: @test_swifterror_2 +; CHECK-NOT: __asan_report_store +; CHECK: ret void +define void @test_swifterror_2(i8** swifterror) sanitize_address { + store i8* null, i8** %0 + ret void +} + +; CHECK-LABEL: @test_swifterror_3 +; CHECK-NOT: __asan_report_store +; CHECK: ret void +define void @test_swifterror_3() sanitize_address { + %swifterror_addr = alloca swifterror i8* + store i8* null, i8** %swifterror_addr + call void @test_swifterror_2(i8** swifterror %swifterror_addr) + ret void +} + ; CHECK: define internal void @asan.module_ctor() ; CHECK: call void @__asan_init() diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll index 7e049c548f22..61ab98dc9997 100644 --- a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll +++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll @@ -54,5 +54,29 @@ entry: ; CHECK: ret void } +; CHECK-LABEL: @SwiftError +; CHECK-NOT: __tsan_read +; CHECK-NOT: __tsan_write +; CHECK: ret +define void @SwiftError(i8** swifterror) sanitize_thread { + %swifterror_ptr_value = load i8*, i8** %0 + store i8* null, i8** %0 + %swifterror_addr = alloca swifterror i8* + %swifterror_ptr_value_2 = load i8*, i8** %swifterror_addr + store i8* null, i8** %swifterror_addr + ret void +} + +; CHECK-LABEL: @SwiftErrorCall +; CHECK-NOT: __tsan_read +; CHECK-NOT: __tsan_write +; CHECK: ret +define void @SwiftErrorCall(i8** swifterror) sanitize_thread { + %swifterror_addr = alloca swifterror i8* + store i8* null, i8** %0 + call void @SwiftError(i8** %0) + ret void +} + ; CHECK: define internal void @tsan.module_ctor() ; CHECK: call void @__tsan_init() diff --git a/test/Transforms/LoopUnroll/runtime-li.ll b/test/Transforms/LoopUnroll/runtime-li.ll new file mode 100644 index 000000000000..5494c8e9da7d --- /dev/null +++ b/test/Transforms/LoopUnroll/runtime-li.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -loop-unroll -unroll-runtime -unroll-count=2 -verify-loop-info -pass-remarks=loop-unroll < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Verify that runtime-unrolling a top-level loop that has nested loops does not +; make the unroller produce invalid loop-info. +; CHECK: remark: {{.*}}: unrolled loop by a factor of 2 with run-time trip count +; CHECK: @widget +; CHECK: ret void +define void @widget(double* %arg, double* %arg1, double* %p, i64* %q1, i64* %q2) local_unnamed_addr { +entry: + br label %header.outer + +header.outer: ; preds = %latch.outer, %entry + %tmp = phi double* [ %tmp8, %latch.outer ], [ %arg, %entry ] + br label %header.inner + +header.inner: ; preds = %latch.inner, %header.outer + br i1 undef, label %latch.inner, label %latch.outer + +latch.inner: ; preds = %header.inner + %tmp5 = load i64, i64* %q1, align 8 + store i64 %tmp5, i64* %q2, align 8 + %tmp6 = icmp eq double* %p, %arg + br label %header.inner + +latch.outer: ; preds = %header.inner + store double 0.0, double* %p, align 8 + %tmp8 = getelementptr inbounds double, double* %tmp, i64 1 + %tmp9 = icmp eq double* %tmp8, %arg1 + br i1 %tmp9, label %exit, label %header.outer + +exit: ; preds = %latch.outer + ret void +} |