diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
| commit | 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch) | |
| tree | 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /include/llvm/IR | |
| parent | eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff) | |
Notes
Diffstat (limited to 'include/llvm/IR')
42 files changed, 2982 insertions, 1177 deletions
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index 0cab8bbb8ead..a05a01073049 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -225,8 +225,8 @@ public: static AttributeSet get(LLVMContext &C, const AttrBuilder &B); static AttributeSet get(LLVMContext &C, ArrayRef<Attribute> Attrs); - bool operator==(const AttributeSet &O) { return SetNode == O.SetNode; } - bool operator!=(const AttributeSet &O) { return !(*this == O); } + bool operator==(const AttributeSet &O) const { return SetNode == O.SetNode; } + bool operator!=(const AttributeSet &O) const { return !(*this == O); } /// Add an argument attribute. Returns a new set because attribute sets are /// immutable. diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td index 616387816bf8..ebe5c1985875 100644 --- a/include/llvm/IR/Attributes.td +++ b/include/llvm/IR/Attributes.td @@ -149,6 +149,9 @@ def StackProtectReq : EnumAttr<"sspreq">; /// Strong Stack protection. def StackProtectStrong : EnumAttr<"sspstrong">; +/// Function was called in a scope requiring strict floating point semantics. +def StrictFP : EnumAttr<"strictfp">; + /// Hidden pointer to structure to return. def StructRet : EnumAttr<"sret">; @@ -161,6 +164,9 @@ def SanitizeThread : EnumAttr<"sanitize_thread">; /// MemorySanitizer is on. def SanitizeMemory : EnumAttr<"sanitize_memory">; +/// HWAddressSanitizer is on. +def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress">; + /// Argument is swift error. def SwiftError : EnumAttr<"swifterror">; @@ -182,6 +188,7 @@ def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">; def NoNansFPMath : StrBoolAttr<"no-nans-fp-math">; def UnsafeFPMath : StrBoolAttr<"unsafe-fp-math">; def NoJumpTables : StrBoolAttr<"no-jump-tables">; +def ProfileSampleAccurate : StrBoolAttr<"profile-sample-accurate">; class CompatRule<string F> { // The name of the function called to check the attribute of the caller and @@ -196,6 +203,7 @@ class CompatRule<string F> { def : CompatRule<"isEqual<SanitizeAddressAttr>">; def : CompatRule<"isEqual<SanitizeThreadAttr>">; def : CompatRule<"isEqual<SanitizeMemoryAttr>">; +def : CompatRule<"isEqual<SanitizeHWAddressAttr>">; def : CompatRule<"isEqual<SafeStackAttr>">; class MergeRule<string F> { @@ -213,6 +221,7 @@ def : MergeRule<"setAND<NoNansFPMathAttr>">; def : MergeRule<"setAND<UnsafeFPMathAttr>">; def : MergeRule<"setOR<NoImplicitFloatAttr>">; def : MergeRule<"setOR<NoJumpTablesAttr>">; +def : MergeRule<"setOR<ProfileSampleAccurateAttr>">; def : MergeRule<"adjustCallerSSPLevel">; def : MergeRule<"adjustCallerStackProbes">; def : MergeRule<"adjustCallerStackProbeSize">; diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h index 6714f2c97473..77cfc9776df0 100644 --- a/include/llvm/IR/BasicBlock.h +++ b/include/llvm/IR/BasicBlock.h @@ -398,6 +398,8 @@ public: /// \brief Return true if it is legal to hoist instructions into this block. bool isLegalToHoistInto() const; + Optional<uint64_t> getIrrLoopHeaderWeight() const; + private: /// \brief Increment the internal refcount of the number of BlockAddresses /// referencing this BasicBlock by \p Amt. diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index 96fbebf42c38..5b10da8f2aee 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -35,7 +35,6 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -46,6 +45,10 @@ namespace llvm { +namespace Intrinsic { +enum ID : unsigned; +} + template <typename FunTy = const Function, typename BBTy = const BasicBlock, typename ValTy = const Value, @@ -59,7 +62,7 @@ class CallSiteBase { protected: PointerIntPair<InstrTy*, 1, bool> I; - CallSiteBase() : I(nullptr, false) {} + CallSiteBase() = default; CallSiteBase(CallTy *CI) : I(CI, true) { assert(CI); } CallSiteBase(InvokeTy *II) : I(II, false) { assert(II); } explicit CallSiteBase(ValTy *II) { *this = get(II); } @@ -107,12 +110,12 @@ public: /// Return true if the callsite is an indirect call. bool isIndirectCall() const { - Value *V = getCalledValue(); + const Value *V = getCalledValue(); if (!V) return false; if (isa<FunTy>(V) || isa<Constant>(V)) return false; - if (CallInst *CI = dyn_cast<CallInst>(getInstruction())) { + if (const CallInst *CI = dyn_cast<CallInst>(getInstruction())) { if (CI->isInlineAsm()) return false; } @@ -426,6 +429,11 @@ public: CALLSITE_DELEGATE_GETTER(isNoBuiltin()); } + /// Return true if the call requires strict floating point semantics. + bool isStrictFP() const { + CALLSITE_DELEGATE_GETTER(isStrictFP()); + } + /// Return true if the call should not be inlined. bool isNoInline() const { CALLSITE_DELEGATE_GETTER(isNoInline()); @@ -467,6 +475,24 @@ public: CALLSITE_DELEGATE_SETTER(setOnlyAccessesArgMemory()); } + /// Determine if the function may only access memory that is + /// inaccessible from the IR. + bool onlyAccessesInaccessibleMemory() const { + CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemory()); + } + void setOnlyAccessesInaccessibleMemory() { + CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemory()); + } + + /// Determine if the function may only access memory that is + /// either inaccessible from the IR or pointed to by its arguments. + bool onlyAccessesInaccessibleMemOrArgMem() const { + CALLSITE_DELEGATE_GETTER(onlyAccessesInaccessibleMemOrArgMem()); + } + void setOnlyAccessesInaccessibleMemOrArgMem() { + CALLSITE_DELEGATE_SETTER(setOnlyAccessesInaccessibleMemOrArgMem()); + } + /// Determine if the call cannot return. bool doesNotReturn() const { CALLSITE_DELEGATE_GETTER(doesNotReturn()); @@ -488,7 +514,7 @@ public: CALLSITE_DELEGATE_GETTER(cannotDuplicate()); } void setCannotDuplicate() { - CALLSITE_DELEGATE_GETTER(setCannotDuplicate()); + CALLSITE_DELEGATE_SETTER(setCannotDuplicate()); } /// Determine if the call is convergent. diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index 850964afc307..84fe836adc35 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -183,16 +183,18 @@ namespace CallingConv { /// which have an "optimized" convention to preserve registers. AVR_BUILTIN = 86, - /// Calling convention used for Mesa vertex shaders. + /// Calling convention used for Mesa vertex shaders, or AMDPAL last shader + /// stage before rasterization (vertex shader if tessellation and geometry + /// are not in use, or otherwise copy shader if one is needed). AMDGPU_VS = 87, - /// Calling convention used for Mesa geometry shaders. + /// Calling convention used for Mesa/AMDPAL geometry shaders. AMDGPU_GS = 88, - /// Calling convention used for Mesa pixel shaders. + /// Calling convention used for Mesa/AMDPAL pixel shaders. AMDGPU_PS = 89, - /// Calling convention used for Mesa compute shaders. + /// Calling convention used for Mesa/AMDPAL compute shaders. AMDGPU_CS = 90, /// Calling convention for AMDGPU code object kernels. @@ -201,14 +203,23 @@ namespace CallingConv { /// Register calling convention used for parameters transfer optimization X86_RegCall = 92, - /// Calling convention used for Mesa hull shaders. (= tessellation control - /// shaders) + /// Calling convention used for Mesa/AMDPAL hull shaders (= tessellation + /// control shaders). AMDGPU_HS = 93, /// Calling convention used for special MSP430 rtlib functions /// which have an "optimized" convention using additional registers. MSP430_BUILTIN = 94, + /// Calling convention used for AMDPAL vertex shader if tessellation is in + /// use. + AMDGPU_LS = 95, + + /// Calling convention used for AMDPAL shader stage before geometry shader + /// if geometry is in use. So either the domain (= tessellation evaluation) + /// shader if tessellation is in use, or otherwise the vertex shader. + AMDGPU_ES = 96, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h index 9daeac6ad6e7..0c94b58a3112 100644 --- a/include/llvm/IR/Constant.h +++ b/include/llvm/IR/Constant.h @@ -117,8 +117,8 @@ public: //// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Value *V) { - return V->getValueID() >= ConstantFirstVal && - V->getValueID() <= ConstantLastVal; + static_assert(ConstantFirstVal == 0, "V->getValueID() >= ConstantFirstVal always succeeds"); + return V->getValueID() <= ConstantLastVal; } /// This method is a special form of User::replaceUsesOfWith diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h index ff6495e7f075..6889e2658244 100644 --- a/include/llvm/IR/ConstantRange.h +++ b/include/llvm/IR/ConstantRange.h @@ -96,9 +96,9 @@ public: /// /// NB! The returned set does *not* contain **all** possible values of X for /// which "X BinOpC Y" does not wrap -- some viable values of X may be - /// missing, so you cannot use this to constrain X's range. E.g. in the last - /// example, "(-2) + 1" is both nsw and nuw (so the "X" could be -2), but (-2) - /// is not in the set returned. + /// missing, so you cannot use this to constrain X's range. E.g. in the + /// fourth example, "(-2) + 1" is both nsw and nuw (so the "X" could be -2), + /// but (-2) is not in the set returned. /// /// Examples: /// typedef OverflowingBinaryOperator OBO; @@ -109,6 +109,10 @@ public: /// MGNR(Add, [i8 1, 2), OBO::NoUnsignedWrap | OBO::NoSignedWrap) /// == [0,INT_MAX) /// MGNR(Add, [i8 -1, 6), OBO::NoSignedWrap) == [INT_MIN+1, INT_MAX-4) + /// MGNR(Sub, [i8 1, 2), OBO::NoSignedWrap) == [-127, 128) + /// MGNR(Sub, [i8 1, 2), OBO::NoUnsignedWrap) == [1, 0) + /// MGNR(Sub, [i8 1, 2), OBO::NoUnsignedWrap | OBO::NoSignedWrap) + /// == [1,INT_MAX) static ConstantRange makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, const ConstantRange &Other, unsigned NoWrapKind); @@ -313,6 +317,10 @@ public: /// logical right shift of a value in this range and a value in \p Other. ConstantRange lshr(const ConstantRange &Other) const; + /// Return a new range representing the possible values resulting from a + /// arithmetic right shift of a value in this range and a value in \p Other. + ConstantRange ashr(const ConstantRange &Other) const; + /// Return a new range that is the logical not of the current set. ConstantRange inverse() const; diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h index 6a14f783005d..3c2074dfe788 100644 --- a/include/llvm/IR/DIBuilder.h +++ b/include/llvm/IR/DIBuilder.h @@ -74,6 +74,17 @@ namespace llvm { /// Create an \a temporary node and track it in \a UnresolvedNodes. void trackIfUnresolved(MDNode *N); + /// Internal helper for insertDeclare. + Instruction *insertDeclare(llvm::Value *Storage, DILocalVariable *VarInfo, + DIExpression *Expr, const DILocation *DL, + BasicBlock *InsertBB, Instruction *InsertBefore); + + /// Internal helper for insertDbgValueIntrinsic. + Instruction * + insertDbgValueIntrinsic(llvm::Value *Val, DILocalVariable *VarInfo, + DIExpression *Expr, const DILocation *DL, + BasicBlock *InsertBB, Instruction *InsertBefore); + public: /// Construct a builder for a module. /// @@ -112,6 +123,8 @@ namespace llvm { /// \param SplitDebugInlining Whether to emit inline debug info. /// \param DebugInfoForProfiling Whether to emit extra debug info for /// profile collection. + /// \param GnuPubnames Whether to emit .debug_gnu_pubnames section instead + /// of .debug_pubnames. DICompileUnit * createCompileUnit(unsigned Lang, DIFile *File, StringRef Producer, bool isOptimized, StringRef Flags, unsigned RV, @@ -119,7 +132,8 @@ namespace llvm { DICompileUnit::DebugEmissionKind Kind = DICompileUnit::DebugEmissionKind::FullDebug, uint64_t DWOId = 0, bool SplitDebugInlining = true, - bool DebugInfoForProfiling = false); + bool DebugInfoForProfiling = false, + bool GnuPubnames = false); /// Create a file descriptor to hold debugging information for a file. /// \param Filename File name. @@ -551,14 +565,6 @@ namespace llvm { DIExpression *createExpression(ArrayRef<uint64_t> Addr = None); DIExpression *createExpression(ArrayRef<int64_t> Addr); - /// Create a descriptor to describe one part - /// of aggregate variable that is fragmented across multiple Values. - /// - /// \param OffsetInBits Offset of the piece in bits. - /// \param SizeInBits Size of the piece in bits. - DIExpression *createFragmentExpression(unsigned OffsetInBits, - unsigned SizeInBits); - /// Create an expression for a variable that does not have an address, but /// does have a constant value. DIExpression *createConstantValueExpression(uint64_t Val) { @@ -729,12 +735,11 @@ namespace llvm { /// Insert a new llvm.dbg.value intrinsic call. /// \param Val llvm::Value of the variable - /// \param Offset Offset /// \param VarInfo Variable's debug info descriptor. /// \param Expr A complex location expression. /// \param DL Debug info location. /// \param InsertAtEnd Location for the new intrinsic. - Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset, + Instruction *insertDbgValueIntrinsic(llvm::Value *Val, DILocalVariable *VarInfo, DIExpression *Expr, const DILocation *DL, @@ -742,23 +747,22 @@ namespace llvm { /// Insert a new llvm.dbg.value intrinsic call. /// \param Val llvm::Value of the variable - /// \param Offset Offset /// \param VarInfo Variable's debug info descriptor. /// \param Expr A complex location expression. /// \param DL Debug info location. /// \param InsertBefore Location for the new intrinsic. - Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset, + Instruction *insertDbgValueIntrinsic(llvm::Value *Val, DILocalVariable *VarInfo, DIExpression *Expr, const DILocation *DL, Instruction *InsertBefore); - /// Replace the vtable holder in the given composite type. + /// Replace the vtable holder in the given type. /// /// If this creates a self reference, it may orphan some unresolved cycles /// in the operands of \c T, so \a DIBuilder needs to track that. void replaceVTableHolder(DICompositeType *&T, - DICompositeType *VTableHolder); + DIType *VTableHolder); /// Replace arrays on a composite type. /// diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h index daf8f8da689d..a6c71a5a2c3e 100644 --- a/include/llvm/IR/DataLayout.h +++ b/include/llvm/IR/DataLayout.h @@ -309,9 +309,7 @@ public: } /// Layout pointer alignment - /// FIXME: The defaults need to be removed once all of - /// the backends/clients are updated. - unsigned getPointerABIAlignment(unsigned AS = 0) const; + unsigned getPointerABIAlignment(unsigned AS) const; /// Return target's alignment for stack-based pointers /// FIXME: The defaults need to be removed once all of diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index 678a43ae7926..75b0c43b6512 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -473,10 +473,12 @@ class DIFile : public DIScope { friend class MDNode; public: + // These values must be explictly set, as they end up in the final object + // file. enum ChecksumKind { - CSK_None, - CSK_MD5, - CSK_SHA1, + CSK_None = 0, + CSK_MD5 = 1, + CSK_SHA1 = 2, CSK_Last = CSK_SHA1 // Should be last enumeration. }; @@ -510,7 +512,7 @@ public: ChecksumKind CSK = CSK_None, StringRef CS = StringRef()), (Filename, Directory, CSK, CS)) - DEFINE_MDNODE_GET(DIFile, (MDString *Filename, MDString *Directory, + DEFINE_MDNODE_GET(DIFile, (MDString * Filename, MDString *Directory, ChecksumKind CSK = CSK_None, MDString *CS = nullptr), (Filename, Directory, CSK, CS)) @@ -1068,16 +1070,17 @@ private: uint64_t DWOId; bool SplitDebugInlining; bool DebugInfoForProfiling; + bool GnuPubnames; DICompileUnit(LLVMContext &C, StorageType Storage, unsigned SourceLanguage, bool IsOptimized, unsigned RuntimeVersion, unsigned EmissionKind, uint64_t DWOId, bool SplitDebugInlining, - bool DebugInfoForProfiling, ArrayRef<Metadata *> Ops) + bool DebugInfoForProfiling, bool GnuPubnames, ArrayRef<Metadata *> Ops) : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops), SourceLanguage(SourceLanguage), IsOptimized(IsOptimized), RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind), DWOId(DWOId), SplitDebugInlining(SplitDebugInlining), - DebugInfoForProfiling(DebugInfoForProfiling) { + DebugInfoForProfiling(DebugInfoForProfiling), GnuPubnames(GnuPubnames) { assert(Storage != Uniqued); } ~DICompileUnit() = default; @@ -1091,15 +1094,14 @@ private: DIGlobalVariableExpressionArray GlobalVariables, DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros, uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling, - StorageType Storage, bool ShouldCreate = true) { - return getImpl(Context, SourceLanguage, File, - getCanonicalMDString(Context, Producer), IsOptimized, - getCanonicalMDString(Context, Flags), RuntimeVersion, - getCanonicalMDString(Context, SplitDebugFilename), - EmissionKind, EnumTypes.get(), RetainedTypes.get(), - GlobalVariables.get(), ImportedEntities.get(), Macros.get(), - DWOId, SplitDebugInlining, DebugInfoForProfiling, Storage, - ShouldCreate); + bool GnuPubnames, StorageType Storage, bool ShouldCreate = true) { + return getImpl( + Context, SourceLanguage, File, getCanonicalMDString(Context, Producer), + IsOptimized, getCanonicalMDString(Context, Flags), RuntimeVersion, + getCanonicalMDString(Context, SplitDebugFilename), EmissionKind, + EnumTypes.get(), RetainedTypes.get(), GlobalVariables.get(), + ImportedEntities.get(), Macros.get(), DWOId, SplitDebugInlining, + DebugInfoForProfiling, GnuPubnames, Storage, ShouldCreate); } static DICompileUnit * getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File, @@ -1108,7 +1110,7 @@ private: unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes, Metadata *GlobalVariables, Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId, bool SplitDebugInlining, - bool DebugInfoForProfiling, StorageType Storage, + bool DebugInfoForProfiling, bool GnuPubnames, StorageType Storage, bool ShouldCreate = true); TempDICompileUnit cloneImpl() const { @@ -1118,7 +1120,7 @@ private: getEmissionKind(), getEnumTypes(), getRetainedTypes(), getGlobalVariables(), getImportedEntities(), getMacros(), DWOId, getSplitDebugInlining(), - getDebugInfoForProfiling()); + getDebugInfoForProfiling(), getGnuPubnames()); } public: @@ -1133,11 +1135,12 @@ public: DICompositeTypeArray EnumTypes, DIScopeArray RetainedTypes, DIGlobalVariableExpressionArray GlobalVariables, DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros, - uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling), + uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling, + bool GnuPubnames), (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes, GlobalVariables, ImportedEntities, Macros, DWOId, SplitDebugInlining, - DebugInfoForProfiling)) + DebugInfoForProfiling, GnuPubnames)) DEFINE_MDNODE_GET_DISTINCT_TEMPORARY( DICompileUnit, (unsigned SourceLanguage, Metadata *File, MDString *Producer, @@ -1145,11 +1148,11 @@ public: MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes, Metadata *GlobalVariables, Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId, - bool SplitDebugInlining, bool DebugInfoForProfiling), + bool SplitDebugInlining, bool DebugInfoForProfiling, bool GnuPubnames), (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes, GlobalVariables, ImportedEntities, Macros, DWOId, SplitDebugInlining, - DebugInfoForProfiling)) + DebugInfoForProfiling, GnuPubnames)) TempDICompileUnit clone() const { return cloneImpl(); } @@ -1160,6 +1163,7 @@ public: return (DebugEmissionKind)EmissionKind; } bool getDebugInfoForProfiling() const { return DebugInfoForProfiling; } + bool getGnuPubnames() const { return GnuPubnames; } StringRef getProducer() const { return getStringOperand(1); } StringRef getFlags() const { return getStringOperand(2); } StringRef getSplitDebugFilename() const { return getStringOperand(3); } @@ -1413,17 +1417,17 @@ public: /// could create a location with a new discriminator. If they are from /// different files/lines the location is ambiguous and can't be /// represented in a single line entry. In this case, no location - /// should be set. + /// should be set, unless the merged instruction is a call, which we will + /// set the merged debug location as line 0 of the nearest common scope + /// where 2 locations are inlined from. This only applies to Instruction; + /// for MachineInstruction, as it is post-inline, we will treat the call + /// instruction the same way as other instructions. /// - /// Currently the function does not create a new location. If the locations - /// are the same, or cannot be discriminated, the first location is returned. - /// Otherwise an empty location will be used. - static const DILocation *getMergedLocation(const DILocation *LocA, - const DILocation *LocB) { - if (LocA && LocB && (LocA == LocB || !LocA->canDiscriminate(*LocB))) - return LocA; - return nullptr; - } + /// \p ForInst: The Instruction the merged DILocation is for. If the + /// Instruction is unavailable or non-existent, use nullptr. + static const DILocation * + getMergedLocation(const DILocation *LocA, const DILocation *LocB, + const Instruction *ForInst = nullptr); /// Returns the base discriminator for a given encoded discriminator \p D. static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) { @@ -2087,6 +2091,8 @@ public: DITypeRef getType() const { return DITypeRef(getRawType()); } uint32_t getAlignInBits() const { return AlignInBits; } uint32_t getAlignInBytes() const { return getAlignInBits() / CHAR_BIT; } + /// Determines the size of the variable's type. + Optional<uint64_t> getSizeInBits() const; StringRef getFilename() const { if (auto *F = getFile()) @@ -2291,8 +2297,23 @@ public: /// Prepend \p DIExpr with a deref and offset operation and optionally turn it /// into a stack value. - static DIExpression *prepend(const DIExpression *DIExpr, bool Deref, - int64_t Offset = 0, bool StackValue = false); + static DIExpression *prepend(const DIExpression *DIExpr, bool DerefBefore, + int64_t Offset = 0, bool DerefAfter = false, + bool StackValue = false); + + /// Create a DIExpression to describe one part of an aggregate variable that + /// is fragmented across multiple Values. The DW_OP_LLVM_fragment operation + /// will be appended to the elements of \c Expr. If \c Expr already contains + /// a \c DW_OP_LLVM_fragment \c OffsetInBits is interpreted as an offset + /// into the existing fragment. + /// + /// \param OffsetInBits Offset of the piece in bits. + /// \param SizeInBits Size of the piece in bits. + /// \return Creating a fragment expression may fail if \c Expr + /// contains arithmetic operations that would be truncated. + static Optional<DIExpression *> + createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, + unsigned SizeInBits); }; /// Global variables. @@ -2630,7 +2651,7 @@ public: Metadata *getRawExpression() const { return getOperand(1); } DIExpression *getExpression() const { - return cast_or_null<DIExpression>(getRawExpression()); + return cast<DIExpression>(getRawExpression()); } static bool classof(const Metadata *MD) { diff --git a/include/llvm/IR/DiagnosticHandler.h b/include/llvm/IR/DiagnosticHandler.h new file mode 100644 index 000000000000..9256d4850df1 --- /dev/null +++ b/include/llvm/IR/DiagnosticHandler.h @@ -0,0 +1,75 @@ +//===- DiagnosticHandler.h - DiagnosticHandler class for LLVM -*- C++ ---*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Base DiagnosticHandler class declaration. Derive from this class to provide +// custom diagnostic reporting. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_DIAGNOSTICHANDLER_H +#define LLVM_IR_DIAGNOSTICHANDLER_H + +#include "llvm/ADT/StringRef.h" + +namespace llvm { +class DiagnosticInfo; + +/// \brief This is the base class for diagnostic handling in LLVM. +/// The handleDiagnostics method must be overriden by the subclasses to handle +/// diagnostic. The *RemarkEnabled methods can be overriden to control +/// which remarks are enabled. +struct DiagnosticHandler { + void *DiagnosticContext = nullptr; + DiagnosticHandler(void *DiagContext = nullptr) + : DiagnosticContext(DiagContext) {} + virtual ~DiagnosticHandler() = default; + + using DiagnosticHandlerTy = void (*)(const DiagnosticInfo &DI, void *Context); + + /// DiagHandlerCallback is settable from the C API and base implementation + /// of DiagnosticHandler will call it from handleDiagnostics(). Any derived + /// class of DiagnosticHandler should not use callback but + /// implement handleDiagnostics(). + DiagnosticHandlerTy DiagHandlerCallback = nullptr; + + /// Override handleDiagnostics to provide custom implementation. + /// Return true if it handles diagnostics reporting properly otherwise + /// return false to make LLVMContext::diagnose() to print the message + /// with a prefix based on the severity. + virtual bool handleDiagnostics(const DiagnosticInfo &DI) { + if (DiagHandlerCallback) { + DiagHandlerCallback(DI, DiagnosticContext); + return true; + } + return false; + } + + /// Return true if analysis remarks are enabled, override + /// to provide different implementation. + virtual bool isAnalysisRemarkEnabled(StringRef PassName) const; + + /// Return true if missed optimization remarks are enabled, override + /// to provide different implementation. + virtual bool isMissedOptRemarkEnabled(StringRef PassName) const; + + /// Return true if passed optimization remarks are enabled, override + /// to provide different implementation. + virtual bool isPassedOptRemarkEnabled(StringRef PassName) const; + + /// Return true if any type of remarks are enabled for this pass. + bool isAnyRemarkEnabled(StringRef PassName) const { + return (isMissedOptRemarkEnabled(PassName) || + isPassedOptRemarkEnabled(PassName) || + isAnalysisRemarkEnabled(PassName)); + } + + /// Return true if any type of remarks are enabled for any pass. + virtual bool isAnyRemarkEnabled() const; +}; +} // namespace llvm + +#endif // LLVM_IR_DIAGNOSTICHANDLER_H diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index 15d332577113..020b67d6b711 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -188,17 +188,13 @@ private: public: /// \p The function that is concerned by this stack size diagnostic. /// \p The computed stack size. - DiagnosticInfoResourceLimit(const Function &Fn, - const char *ResourceName, + DiagnosticInfoResourceLimit(const Function &Fn, const char *ResourceName, uint64_t ResourceSize, DiagnosticSeverity Severity = DS_Warning, DiagnosticKind Kind = DK_ResourceLimit, uint64_t ResourceLimit = 0) - : DiagnosticInfo(Kind, Severity), - Fn(Fn), - ResourceName(ResourceName), - ResourceSize(ResourceSize), - ResourceLimit(ResourceLimit) {} + : DiagnosticInfo(Kind, Severity), Fn(Fn), ResourceName(ResourceName), + ResourceSize(ResourceSize), ResourceLimit(ResourceLimit) {} const Function &getFunction() const { return Fn; } const char *getResourceName() const { return ResourceName; } @@ -209,19 +205,17 @@ public: void print(DiagnosticPrinter &DP) const override; static bool classof(const DiagnosticInfo *DI) { - return DI->getKind() == DK_ResourceLimit || - DI->getKind() == DK_StackSize; + return DI->getKind() == DK_ResourceLimit || DI->getKind() == DK_StackSize; } }; class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit { public: - DiagnosticInfoStackSize(const Function &Fn, - uint64_t StackSize, + DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize, DiagnosticSeverity Severity = DS_Warning, uint64_t StackLimit = 0) - : DiagnosticInfoResourceLimit(Fn, "stack size", StackSize, - Severity, DK_StackSize, StackLimit) {} + : DiagnosticInfoResourceLimit(Fn, "stack size", StackSize, Severity, + DK_StackSize, StackLimit) {} uint64_t getStackSize() const { return getResourceSize(); } uint64_t getStackLimit() const { return getResourceLimit(); } @@ -244,7 +238,7 @@ public: /// \p The module that is concerned by this debug metadata version diagnostic. /// \p The actual metadata version. DiagnosticInfoDebugMetadataVersion(const Module &M, unsigned MetadataVersion, - DiagnosticSeverity Severity = DS_Warning) + DiagnosticSeverity Severity = DS_Warning) : DiagnosticInfo(DK_DebugMetadataVersion, Severity), M(M), MetadataVersion(MetadataVersion) {} @@ -411,7 +405,7 @@ public: /// \brief Used in the streaming interface as the general argument type. It /// internally converts everything into a key-value pair. struct Argument { - StringRef Key; + std::string Key; std::string Val; // If set, the debug location corresponding to the value. DiagnosticLocation Loc; @@ -419,9 +413,15 @@ public: explicit Argument(StringRef Str = "") : Key("String"), Val(Str) {} Argument(StringRef Key, const Value *V); Argument(StringRef Key, const Type *T); + Argument(StringRef Key, StringRef S); Argument(StringRef Key, int N); + Argument(StringRef Key, long N); + Argument(StringRef Key, long long N); Argument(StringRef Key, unsigned N); + Argument(StringRef Key, unsigned long N); + Argument(StringRef Key, unsigned long long N); Argument(StringRef Key, bool B) : Key(Key), Val(B ? "true" : "false") {} + Argument(StringRef Key, DebugLoc dl); }; /// \p PassName is the name of the pass emitting this diagnostic. \p @@ -438,10 +438,10 @@ public: : DiagnosticInfoWithLocationBase(Kind, Severity, Fn, Loc), PassName(PassName), RemarkName(RemarkName) {} - DiagnosticInfoOptimizationBase &operator<<(StringRef S); - DiagnosticInfoOptimizationBase &operator<<(Argument A); - DiagnosticInfoOptimizationBase &operator<<(setIsVerbose V); - DiagnosticInfoOptimizationBase &operator<<(setExtraArgs EA); + void insert(StringRef S); + void insert(Argument A); + void insert(setIsVerbose V); + void insert(setExtraArgs EA); /// \see DiagnosticInfo::print. void print(DiagnosticPrinter &DP) const override; @@ -511,6 +511,81 @@ protected: friend struct yaml::MappingTraits<DiagnosticInfoOptimizationBase *>; }; +/// Allow the insertion operator to return the actual remark type rather than a +/// common base class. This allows returning the result of the insertion +/// directly by value, e.g. return OptimizationRemarkAnalysis(...) << "blah". +template <class RemarkT> +RemarkT & +operator<<(RemarkT &R, + typename std::enable_if< + std::is_base_of<DiagnosticInfoOptimizationBase, RemarkT>::value, + StringRef>::type S) { + R.insert(S); + return R; +} + +/// Also allow r-value for the remark to allow insertion into a +/// temporarily-constructed remark. +template <class RemarkT> +RemarkT & +operator<<(RemarkT &&R, + typename std::enable_if< + std::is_base_of<DiagnosticInfoOptimizationBase, RemarkT>::value, + StringRef>::type S) { + R.insert(S); + return R; +} + +template <class RemarkT> +RemarkT & +operator<<(RemarkT &R, + typename std::enable_if< + std::is_base_of<DiagnosticInfoOptimizationBase, RemarkT>::value, + DiagnosticInfoOptimizationBase::Argument>::type A) { + R.insert(A); + return R; +} + +template <class RemarkT> +RemarkT & +operator<<(RemarkT &&R, + typename std::enable_if< + std::is_base_of<DiagnosticInfoOptimizationBase, RemarkT>::value, + DiagnosticInfoOptimizationBase::Argument>::type A) { + R.insert(A); + return R; +} + +template <class RemarkT> +RemarkT & +operator<<(RemarkT &R, + typename std::enable_if< + std::is_base_of<DiagnosticInfoOptimizationBase, RemarkT>::value, + DiagnosticInfoOptimizationBase::setIsVerbose>::type V) { + R.insert(V); + return R; +} + +template <class RemarkT> +RemarkT & +operator<<(RemarkT &&R, + typename std::enable_if< + std::is_base_of<DiagnosticInfoOptimizationBase, RemarkT>::value, + DiagnosticInfoOptimizationBase::setIsVerbose>::type V) { + R.insert(V); + return R; +} + +template <class RemarkT> +RemarkT & +operator<<(RemarkT &R, + typename std::enable_if< + std::is_base_of<DiagnosticInfoOptimizationBase, RemarkT>::value, + DiagnosticInfoOptimizationBase::setExtraArgs>::type EA) { + R.insert(EA); + return R; +} + /// \brief Common features for diagnostics dealing with optimization remarks /// that are used by IR passes. class DiagnosticInfoIROptimization : public DiagnosticInfoOptimizationBase { @@ -604,10 +679,8 @@ public: return DI->getKind() == DK_OptimizationRemark; } - static bool isEnabled(StringRef PassName); - /// \see DiagnosticInfoOptimizationBase::isEnabled. - bool isEnabled() const override { return isEnabled(getPassName()); } + bool isEnabled() const override; private: /// This is deprecated now and only used by the function API below. @@ -623,11 +696,6 @@ private: const DiagnosticLocation &Loc, const Twine &Msg) : DiagnosticInfoIROptimization(DK_OptimizationRemark, DS_Remark, PassName, Fn, Loc, Msg) {} - - friend void emitOptimizationRemark(LLVMContext &Ctx, const char *PassName, - const Function &Fn, - const DiagnosticLocation &Loc, - const Twine &Msg); }; /// Diagnostic information for missed-optimization remarks. @@ -652,10 +720,8 @@ public: return DI->getKind() == DK_OptimizationRemarkMissed; } - static bool isEnabled(StringRef PassName); - /// \see DiagnosticInfoOptimizationBase::isEnabled. - bool isEnabled() const override { return isEnabled(getPassName()); } + bool isEnabled() const override; private: /// This is deprecated now and only used by the function API below. @@ -671,12 +737,6 @@ private: const DiagnosticLocation &Loc, const Twine &Msg) : DiagnosticInfoIROptimization(DK_OptimizationRemarkMissed, DS_Remark, PassName, Fn, Loc, Msg) {} - - friend void emitOptimizationRemarkMissed(LLVMContext &Ctx, - const char *PassName, - const Function &Fn, - const DiagnosticLocation &Loc, - const Twine &Msg); }; /// Diagnostic information for optimization analysis remarks. @@ -712,12 +772,8 @@ public: return DI->getKind() == DK_OptimizationRemarkAnalysis; } - static bool isEnabled(StringRef PassName); - /// \see DiagnosticInfoOptimizationBase::isEnabled. - bool isEnabled() const override { - return shouldAlwaysPrint() || isEnabled(getPassName()); - } + bool isEnabled() const override; static const char *AlwaysPrint; @@ -748,12 +804,6 @@ private: const DiagnosticLocation &Loc, const Twine &Msg) : DiagnosticInfoIROptimization(DK_OptimizationRemarkAnalysis, DS_Remark, PassName, Fn, Loc, Msg) {} - - friend void emitOptimizationRemarkAnalysis(LLVMContext &Ctx, - const char *PassName, - const Function &Fn, - const DiagnosticLocation &Loc, - const Twine &Msg); }; /// Diagnostic information for optimization analysis remarks related to @@ -795,10 +845,6 @@ private: const Twine &Msg) : OptimizationRemarkAnalysis(DK_OptimizationRemarkAnalysisFPCommute, PassName, Fn, Loc, Msg) {} - - friend void emitOptimizationRemarkAnalysisFPCommute( - LLVMContext &Ctx, const char *PassName, const Function &Fn, - const DiagnosticLocation &Loc, const Twine &Msg); }; /// Diagnostic information for optimization analysis remarks related to @@ -839,10 +885,6 @@ private: const Twine &Msg) : OptimizationRemarkAnalysis(DK_OptimizationRemarkAnalysisAliasing, PassName, Fn, Loc, Msg) {} - - friend void emitOptimizationRemarkAnalysisAliasing( - LLVMContext &Ctx, const char *PassName, const Function &Fn, - const DiagnosticLocation &Loc, const Twine &Msg); }; /// Diagnostic information for machine IR parser. @@ -885,74 +927,6 @@ public: // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DiagnosticInfo, LLVMDiagnosticInfoRef) -/// \brief Legacy interface to emit an optimization-applied message. Use -/// (Machine)OptimizationRemarkEmitter instead. -/// -/// \p PassName is the name of the pass emitting the message. If -Rpass= is -/// given and \p PassName matches the regular expression in -Rpass, then the -/// remark will be emitted. \p Fn is the function triggering the remark, \p Loc -/// is the debug location where the diagnostic is generated. \p Msg is the -/// message string to use. -void emitOptimizationRemark(LLVMContext &Ctx, const char *PassName, - const Function &Fn, const DiagnosticLocation &Loc, - const Twine &Msg); - -/// \brief Legacy interface to emit an optimization-missed message. Use -/// (Machine)OptimizationRemarkEmitter instead. -/// -/// \p PassName is the name of the pass emitting the message. If -Rpass-missed= -/// is given and \p PassName matches the regular expression in -Rpass, then the -/// remark will be emitted. \p Fn is the function triggering the remark, \p Loc -/// is the debug location where the diagnostic is generated. \p Msg is the -/// message string to use. -void emitOptimizationRemarkMissed(LLVMContext &Ctx, const char *PassName, - const Function &Fn, - const DiagnosticLocation &Loc, - const Twine &Msg); - -/// \brief Legacy interface to emit an optimization analysis remark message. -/// Use (Machine)OptimizationRemarkEmitter instead. -/// -/// \p PassName is the name of the pass emitting the message. If -/// -Rpass-analysis= is given and \p PassName matches the regular expression in -/// -Rpass, then the remark will be emitted. \p Fn is the function triggering -/// the remark, \p Loc is the debug location where the diagnostic is -/// generated. \p Msg is the message string to use. -void emitOptimizationRemarkAnalysis(LLVMContext &Ctx, const char *PassName, - const Function &Fn, - const DiagnosticLocation &Loc, - const Twine &Msg); - -/// \brief Legacy interface to emit an optimization analysis remark related to -/// messages about floating-point non-commutativity. Use -/// (Machine)OptimizationRemarkEmitter instead. -/// -/// \p PassName is the name of the pass emitting the message. If -/// -Rpass-analysis= is given and \p PassName matches the regular expression in -/// -Rpass, then the remark will be emitted. \p Fn is the function triggering -/// the remark, \p Loc is the debug location where the diagnostic is -/// generated. \p Msg is the message string to use. -void emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx, - const char *PassName, - const Function &Fn, - const DiagnosticLocation &Loc, - const Twine &Msg); - -/// \brief Legacy interface to emit an optimization analysis remark related to -/// messages about pointer aliasing. Use (Machine)OptimizationRemarkEmitter -/// instead. -/// -/// \p PassName is the name of the pass emitting the message. -/// If -Rpass-analysis= is given and \p PassName matches the regular expression -/// in -Rpass, then the remark will be emitted. \p Fn is the function triggering -/// the remark, \p Loc is the debug location where the diagnostic is generated. -/// \p Msg is the message string to use. -void emitOptimizationRemarkAnalysisAliasing(LLVMContext &Ctx, - const char *PassName, - const Function &Fn, - const DiagnosticLocation &Loc, - const Twine &Msg); - /// Diagnostic information for optimization failures. class DiagnosticInfoOptimizationFailure : public DiagnosticInfoIROptimization { public: @@ -1013,6 +987,12 @@ public: void print(DiagnosticPrinter &DP) const override; }; +namespace yaml { +template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> { + static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag); +}; +} // namespace yaml + } // end namespace llvm #endif // LLVM_IR_DIAGNOSTICINFO_H diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h index 5b21a2c83e4a..6ad99e516fba 100644 --- a/include/llvm/IR/Dominators.h +++ b/include/llvm/IR/Dominators.h @@ -41,9 +41,12 @@ namespace DomTreeBuilder { using BBDomTree = DomTreeBase<BasicBlock>; using BBPostDomTree = PostDomTreeBase<BasicBlock>; -extern template void Calculate<BBDomTree, Function>(BBDomTree &DT, Function &F); -extern template void Calculate<BBPostDomTree, Function>(BBPostDomTree &DT, - Function &F); +extern template struct Update<BasicBlock *>; + +using BBUpdates = ArrayRef<Update<BasicBlock *>>; + +extern template void Calculate<BBDomTree>(BBDomTree &DT); +extern template void Calculate<BBPostDomTree>(BBPostDomTree &DT); extern template void InsertEdge<BBDomTree>(BBDomTree &DT, BasicBlock *From, BasicBlock *To); @@ -57,6 +60,9 @@ extern template void DeleteEdge<BBPostDomTree>(BBPostDomTree &DT, BasicBlock *From, BasicBlock *To); +extern template void ApplyUpdates<BBDomTree>(BBDomTree &DT, BBUpdates); +extern template void ApplyUpdates<BBPostDomTree>(BBPostDomTree &DT, BBUpdates); + extern template bool Verify<BBDomTree>(const BBDomTree &DT); extern template bool Verify<BBPostDomTree>(const BBPostDomTree &DT); } // namespace DomTreeBuilder diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 75fccc135dae..e811ae5e215a 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -30,7 +30,6 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Value.h" @@ -44,6 +43,10 @@ namespace llvm { +namespace Intrinsic { +enum ID : unsigned; +} + class AssemblyAnnotationWriter; class Constant; class DISubprogram; @@ -125,6 +128,11 @@ public: void operator=(const Function&) = delete; ~Function(); + // This is here to help easily convert from FunctionT * (Function * or + // MachineFunction *) in BlockFrequencyInfoImpl to Function * by calling + // FunctionT->getFunction(). + const Function &getFunction() const { return *this; } + static Function *Create(FunctionType *Ty, LinkageTypes Linkage, const Twine &N = "", Module *M = nullptr) { return new Function(Ty, Linkage, N, M); @@ -414,7 +422,7 @@ public: } void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); } - /// @brief Determine if the function may only access memory that is + /// @brief Determine if the function may only access memory that is /// inaccessible from the IR. bool onlyAccessesInaccessibleMemory() const { return hasFnAttribute(Attribute::InaccessibleMemOnly); @@ -482,7 +490,7 @@ public: } void setDoesNotRecurse() { addFnAttr(Attribute::NoRecurse); - } + } /// @brief True if the ABI mandates (or the user requested) that this /// function be in a unwind table. diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index d65d43cc5957..1793de7887fc 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -80,13 +80,14 @@ protected: ValueType(Ty), Linkage(Linkage), Visibility(DefaultVisibility), UnnamedAddrVal(unsigned(UnnamedAddr::None)), DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal), - HasLLVMReservedName(false), IntID((Intrinsic::ID)0U), Parent(nullptr) { + HasLLVMReservedName(false), IsDSOLocal(false), + IntID((Intrinsic::ID)0U), Parent(nullptr) { setName(Name); } Type *ValueType; - static const unsigned GlobalValueSubClassDataBits = 18; + static const unsigned GlobalValueSubClassDataBits = 17; // All bitfields use unsigned as the underlying type so that MSVC will pack // them. @@ -103,11 +104,15 @@ protected: /// Function::intrinsicID() returns Intrinsic::not_intrinsic. unsigned HasLLVMReservedName : 1; + /// If true then there is a definition within the same linkage unit and that + /// definition cannot be runtime preempted. + unsigned IsDSOLocal : 1; + private: friend class Constant; // Give subclasses access to what otherwise would be wasted padding. - // (18 + 4 + 2 + 2 + 2 + 3 + 1) == 32. + // (17 + 4 + 2 + 2 + 2 + 3 + 1 + 1) == 32. unsigned SubClassData : GlobalValueSubClassDataBits; void destroyConstantImpl(); @@ -261,6 +266,12 @@ public: Type *getValueType() const { return ValueType; } + void setDSOLocal(bool Local) { IsDSOLocal = Local; } + + bool isDSOLocal() const { + return IsDSOLocal; + } + static LinkageTypes getLinkOnceLinkage(bool ODR) { return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage; } diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index 5344a93efb33..e687ca689d46 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -438,22 +438,26 @@ public: /// \brief Create and insert an element unordered-atomic memcpy between the /// specified pointers. /// + /// DstAlign/SrcAlign are the alignments of the Dst/Src pointers, respectively. + /// /// If the pointers aren't i8*, they will be converted. If a TBAA tag is /// specified, it will be added to the instruction. Likewise with alias.scope /// and noalias tags. CallInst *CreateElementUnorderedAtomicMemCpy( - Value *Dst, Value *Src, uint64_t Size, uint32_t ElementSize, - MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, - MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { + Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, + uint64_t Size, uint32_t ElementSize, MDNode *TBAATag = nullptr, + MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr) { return CreateElementUnorderedAtomicMemCpy( - Dst, Src, getInt64(Size), ElementSize, TBAATag, TBAAStructTag, ScopeTag, - NoAliasTag); + Dst, DstAlign, Src, SrcAlign, getInt64(Size), ElementSize, TBAATag, + TBAAStructTag, ScopeTag, NoAliasTag); } CallInst *CreateElementUnorderedAtomicMemCpy( - Value *Dst, Value *Src, Value *Size, uint32_t ElementSize, - MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, - MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, Value *Size, + uint32_t ElementSize, MDNode *TBAATag = nullptr, + MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr); /// \brief Create and insert a memmove between the specified /// pointers. @@ -1806,26 +1810,28 @@ public: /// \brief Create an invariant.group.barrier intrinsic call, that stops /// optimizer to propagate equality using invariant.group metadata. - /// If Ptr type is different from i8*, it's casted to i8* before call - /// and casted back to Ptr type after call. + /// If Ptr type is different from pointer to i8, it's casted to pointer to i8 + /// in the same address space before call and casted back to Ptr type after + /// call. Value *CreateInvariantGroupBarrier(Value *Ptr) { + assert(isa<PointerType>(Ptr->getType()) && + "invariant.group.barrier only applies to pointers."); + auto *PtrType = Ptr->getType(); + auto *Int8PtrTy = getInt8PtrTy(PtrType->getPointerAddressSpace()); + if (PtrType != Int8PtrTy) + Ptr = CreateBitCast(Ptr, Int8PtrTy); Module *M = BB->getParent()->getParent(); - Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration(M, - Intrinsic::invariant_group_barrier); - - Type *ArgumentAndReturnType = FnInvariantGroupBarrier->getReturnType(); - assert(ArgumentAndReturnType == - FnInvariantGroupBarrier->getFunctionType()->getParamType(0) && - "InvariantGroupBarrier should take and return the same type"); - Type *PtrType = Ptr->getType(); + Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration( + M, Intrinsic::invariant_group_barrier, {Int8PtrTy}); - bool PtrTypeConversionNeeded = PtrType != ArgumentAndReturnType; - if (PtrTypeConversionNeeded) - Ptr = CreateBitCast(Ptr, ArgumentAndReturnType); + assert(FnInvariantGroupBarrier->getReturnType() == Int8PtrTy && + FnInvariantGroupBarrier->getFunctionType()->getParamType(0) == + Int8PtrTy && + "InvariantGroupBarrier should take and return the same type"); CallInst *Fn = CreateCall(FnInvariantGroupBarrier, {Ptr}); - if (PtrTypeConversionNeeded) + if (PtrType != Int8PtrTy) return CreateBitCast(Fn, PtrType); return Fn; } diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h index 59874b05b0ce..1519a45d59e9 100644 --- a/include/llvm/IR/InlineAsm.h +++ b/include/llvm/IR/InlineAsm.h @@ -101,7 +101,7 @@ public: /// input constraint is required to match it (e.g. "0"). The value is the /// constraint number that matches this one (for example, if this is /// constraint #0 and constraint #4 has the value "0", this will be 4). - signed char MatchingInput = -1; + int MatchingInput = -1; /// Code - The constraint code, either the register name (in braces) or the /// constraint letter/number. @@ -128,7 +128,7 @@ public: /// input constraint is required to match it (e.g. "0"). The value is the /// constraint number that matches this one (for example, if this is /// constraint #0 and constraint #4 has the value "0", this will be 4). - signed char MatchingInput = -1; + int MatchingInput = -1; /// hasMatchingInput - Return true if this is an output constraint that has /// a matching input constraint. diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index d749077fd34a..871f702f95f2 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -775,28 +775,21 @@ public: /// A no-op cast is one that can be effected without changing any bits. /// It implies that the source and destination types are the same size. The - /// IntPtrTy argument is used to make accurate determinations for casts + /// DataLayout argument is to determine the pointer size when examining casts /// involving Integer and Pointer types. They are no-op casts if the integer /// is the same size as the pointer. However, pointer size varies with - /// platform. Generally, the result of DataLayout::getIntPtrType() should be - /// passed in. If that's not available, use Type::Int64Ty, which will make - /// the isNoopCast call conservative. + /// platform. /// @brief Determine if the described cast is a no-op cast. static bool isNoopCast( - Instruction::CastOps Opcode, ///< Opcode of cast - Type *SrcTy, ///< SrcTy of cast - Type *DstTy, ///< DstTy of cast - Type *IntPtrTy ///< Integer type corresponding to Ptr types + Instruction::CastOps Opcode, ///< Opcode of cast + Type *SrcTy, ///< SrcTy of cast + Type *DstTy, ///< DstTy of cast + const DataLayout &DL ///< DataLayout to get the Int Ptr type from. ); /// @brief Determine if this cast is a no-op cast. - bool isNoopCast( - Type *IntPtrTy ///< Integer type corresponding to pointer - ) const; - - /// @brief Determine if this cast is a no-op cast. /// - /// \param DL is the DataLayout to get the Int Ptr type from. + /// \param DL is the DataLayout to determine pointer size. bool isNoopCast(const DataLayout &DL) const; /// Determine how a pair of casts can be eliminated, if they can be at all. @@ -1487,6 +1480,12 @@ protected: default: return false; + case Attribute::InaccessibleMemOrArgMemOnly: + return hasReadingOperandBundles(); + + case Attribute::InaccessibleMemOnly: + return hasReadingOperandBundles(); + case Attribute::ArgMemOnly: return hasReadingOperandBundles(); diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index 8dc02111b866..6af9cbfae5de 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -34,6 +34,7 @@ namespace llvm { class BasicBlock; class FastMathFlags; class MDNode; +class Module; struct AAMDNodes; template <> struct ilist_alloc_traits<Instruction> { @@ -113,6 +114,10 @@ public: /// \pre I is a valid iterator into BB. void moveBefore(BasicBlock &BB, SymbolTableList<Instruction>::iterator I); + /// Unlink this instruction from its current basic block and insert it into + /// the basic block that MovePos lives in, right after MovePos. + void moveAfter(Instruction *MovePos); + //===--------------------------------------------------------------------===// // Subclass classification. //===--------------------------------------------------------------------===// @@ -304,10 +309,15 @@ public: /// Determine whether the exact flag is set. bool isExact() const; - /// Set or clear the unsafe-algebra flag on this instruction, which must be an + /// Set or clear all fast-math-flags on this instruction, which must be an /// operator which supports this flag. See LangRef.html for the meaning of /// this flag. - void setHasUnsafeAlgebra(bool B); + void setFast(bool B); + + /// Set or clear the reassociation flag on this instruction, which must be + /// an operator which supports this flag. See LangRef.html for the meaning of + /// this flag. + void setHasAllowReassoc(bool B); /// Set or clear the no-nans flag on this instruction, which must be an /// operator which supports this flag. See LangRef.html for the meaning of @@ -329,6 +339,11 @@ public: /// this flag. void setHasAllowReciprocal(bool B); + /// Set or clear the approximate-math-functions flag on this instruction, + /// which must be an operator which supports this flag. See LangRef.html for + /// the meaning of this flag. + void setHasApproxFunc(bool B); + /// Convenience function for setting multiple fast-math flags on this /// instruction, which must be an operator which supports these flags. See /// LangRef.html for the meaning of these flags. @@ -339,8 +354,11 @@ public: /// LangRef.html for the meaning of these flags. void copyFastMathFlags(FastMathFlags FMF); - /// Determine whether the unsafe-algebra flag is set. - bool hasUnsafeAlgebra() const; + /// Determine whether all fast-math-flags are set. + bool isFast() const; + + /// Determine whether the allow-reassociation flag is set. + bool hasAllowReassoc() const; /// Determine whether the no-NaNs flag is set. bool hasNoNaNs() const; @@ -357,6 +375,9 @@ public: /// Determine whether the allow-contract flag is set. bool hasAllowContract() const; + /// Determine whether the approximate-math-functions flag is set. + bool hasApproxFunc() const; + /// Convenience function for getting all the fast-math flags, which must be an /// operator which supports these flags. See LangRef.html for the meaning of /// these flags. @@ -373,6 +394,21 @@ public: /// V and this instruction. void andIRFlags(const Value *V); + /// Merge 2 debug locations and apply it to the Instruction. If the + /// instruction is a CallIns, we need to traverse the inline chain to find + /// the common scope. This is not efficient for N-way merging as each time + /// you merge 2 iterations, you need to rebuild the hashmap to find the + /// common scope. However, we still choose this API because: + /// 1) Simplicity: it takes 2 locations instead of a list of locations. + /// 2) In worst case, it increases the complexity from O(N*I) to + /// O(2*N*I), where N is # of Instructions to merge, and I is the + /// maximum level of inline stack. So it is still linear. + /// 3) Merging of call instructions should be extremely rare in real + /// applications, thus the N-way merging should be in code path. + /// The DebugLoc attached to this instruction will be overwritten by the + /// merged DebugLoc. + void applyMergedLocation(const DILocation *LocA, const DILocation *LocB); + private: /// Return true if we have an entry in the on-the-side metadata hash. bool hasMetadataHashEntry() const { diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index 60ae98869e55..c1122d137f24 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -1757,6 +1757,9 @@ public: !hasFnAttrImpl(Attribute::Builtin); } + /// Determine if the call requires strict floating point semantics. + bool isStrictFP() const { return hasFnAttr(Attribute::StrictFP); } + /// Return true if the call should not be inlined. bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline() { @@ -1804,6 +1807,24 @@ public: addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly); } + /// @brief Determine if the function may only access memory that is + /// inaccessible from the IR. + bool onlyAccessesInaccessibleMemory() const { + return hasFnAttr(Attribute::InaccessibleMemOnly); + } + void setOnlyAccessesInaccessibleMemory() { + addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly); + } + + /// @brief Determine if the function may only access memory that is + /// either inaccessible from the IR or pointed to by its arguments. + bool onlyAccessesInaccessibleMemOrArgMem() const { + return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + } + void setOnlyAccessesInaccessibleMemOrArgMem() { + addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOrArgMemOnly); + } + /// Determine if the call cannot return. bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); } void setDoesNotReturn() { @@ -3844,6 +3865,9 @@ public: !hasFnAttrImpl(Attribute::Builtin); } + /// Determine if the call requires strict floating point semantics. + bool isStrictFP() const { return hasFnAttr(Attribute::StrictFP); } + /// Return true if the call should not be inlined. bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline() { @@ -3883,6 +3907,24 @@ public: addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly); } + /// @brief Determine if the function may only access memory that is + /// inaccessible from the IR. + bool onlyAccessesInaccessibleMemory() const { + return hasFnAttr(Attribute::InaccessibleMemOnly); + } + void setOnlyAccessesInaccessibleMemory() { + addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly); + } + + /// @brief Determine if the function may only access memory that is + /// either inaccessible from the IR or pointed to by its arguments. + bool onlyAccessesInaccessibleMemOrArgMem() const { + return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + } + void setOnlyAccessesInaccessibleMemOrArgMem() { + addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOrArgMemOnly); + } + /// Determine if the call cannot return. bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); } void setDoesNotReturn() { @@ -4195,11 +4237,10 @@ private: } public: - using DerefFnTy = std::pointer_to_unary_function<Value *, BasicBlock *>; + using DerefFnTy = BasicBlock *(*)(Value *); using handler_iterator = mapped_iterator<op_iterator, DerefFnTy>; using handler_range = iterator_range<handler_iterator>; - using ConstDerefFnTy = - std::pointer_to_unary_function<const Value *, const BasicBlock *>; + using ConstDerefFnTy = const BasicBlock *(*)(const Value *); using const_handler_iterator = mapped_iterator<const_op_iterator, ConstDerefFnTy>; using const_handler_range = iterator_range<const_handler_iterator>; diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h index f55d17ec72c8..2ca0a24cbae1 100644 --- a/include/llvm/IR/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -71,11 +71,35 @@ namespace llvm { /// variable's value or its address. Value *getVariableLocation(bool AllowNullOp = true) const; - // Methods for support type inquiry through isa, cast, and dyn_cast: + /// Does this describe the address of a local variable. True for dbg.addr + /// and dbg.declare, but not dbg.value, which describes its value. + bool isAddressOfVariable() const { + return getIntrinsicID() != Intrinsic::dbg_value; + } + + DILocalVariable *getVariable() const { + return cast<DILocalVariable>(getRawVariable()); + } + + DIExpression *getExpression() const { + return cast<DIExpression>(getRawExpression()); + } + + Metadata *getRawVariable() const { + return cast<MetadataAsValue>(getArgOperand(1))->getMetadata(); + } + + Metadata *getRawExpression() const { + return cast<MetadataAsValue>(getArgOperand(2))->getMetadata(); + } + + /// \name Casting methods + /// @{ static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + case Intrinsic::dbg_addr: return true; default: return false; } @@ -83,6 +107,7 @@ namespace llvm { static bool classof(const Value *V) { return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); } + /// @} }; /// This represents the llvm.dbg.declare instruction. @@ -90,25 +115,26 @@ namespace llvm { public: Value *getAddress() const { return getVariableLocation(); } - DILocalVariable *getVariable() const { - return cast<DILocalVariable>(getRawVariable()); - } - - DIExpression *getExpression() const { - return cast<DIExpression>(getRawExpression()); + /// \name Casting methods + /// @{ + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::dbg_declare; } - - Metadata *getRawVariable() const { - return cast<MetadataAsValue>(getArgOperand(1))->getMetadata(); + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); } + /// @} + }; - Metadata *getRawExpression() const { - return cast<MetadataAsValue>(getArgOperand(2))->getMetadata(); - } + /// This represents the llvm.dbg.addr instruction. + class DbgAddrIntrinsic : public DbgInfoIntrinsic { + public: + Value *getAddress() const { return getVariableLocation(); } - // Methods for support type inquiry through isa, cast, and dyn_cast: + /// \name Casting methods + /// @{ static bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::dbg_declare; + return I->getIntrinsicID() == Intrinsic::dbg_addr; } static bool classof(const Value *V) { return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); @@ -122,34 +148,15 @@ namespace llvm { return getVariableLocation(/* AllowNullOp = */ false); } - uint64_t getOffset() const { - return cast<ConstantInt>( - const_cast<Value*>(getArgOperand(1)))->getZExtValue(); - } - - DILocalVariable *getVariable() const { - return cast<DILocalVariable>(getRawVariable()); - } - - DIExpression *getExpression() const { - return cast<DIExpression>(getRawExpression()); - } - - Metadata *getRawVariable() const { - return cast<MetadataAsValue>(getArgOperand(2))->getMetadata(); - } - - Metadata *getRawExpression() const { - return cast<MetadataAsValue>(getArgOperand(3))->getMetadata(); - } - - // Methods for support type inquiry through isa, cast, and dyn_cast: + /// \name Casting methods + /// @{ static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::dbg_value; } static bool classof(const Value *V) { return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); } + /// @} }; /// This is the common base class for constrained floating point intrinsics. @@ -172,6 +179,7 @@ namespace llvm { }; bool isUnaryOp() const; + bool isTernaryOp() const; RoundingMode getRoundingMode() const; ExceptionBehavior getExceptionBehavior() const; @@ -183,6 +191,7 @@ namespace llvm { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_fma: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -204,12 +213,13 @@ namespace llvm { } }; - /// This class represents atomic memcpy intrinsic - /// TODO: Integrate this class into MemIntrinsic hierarchy; for now this is - /// C&P of all methods from that hierarchy - class ElementUnorderedAtomicMemCpyInst : public IntrinsicInst { + /// Common base class for all memory intrinsics. Simply provides + /// common methods. + /// Written as CRTP to avoid a common base class amongst the + /// three atomicity hierarchies. + template <typename Derived> class MemIntrinsicBase : public IntrinsicInst { private: - enum { ARG_DEST = 0, ARG_SOURCE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + enum { ARG_DEST = 0, ARG_LENGTH = 2 }; public: Value *getRawDest() const { @@ -218,51 +228,21 @@ namespace llvm { const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } - /// Return the arguments to the instruction. - Value *getRawSource() const { - return const_cast<Value *>(getArgOperand(ARG_SOURCE)); - } - const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); } - Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); } - Value *getLength() const { return const_cast<Value *>(getArgOperand(ARG_LENGTH)); } const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } - bool isVolatile() const { return false; } - - Value *getRawElementSizeInBytes() const { - return const_cast<Value *>(getArgOperand(ARG_ELEMENTSIZE)); - } - - ConstantInt *getElementSizeInBytesCst() const { - return cast<ConstantInt>(getRawElementSizeInBytes()); - } - - uint32_t getElementSizeInBytes() const { - return getElementSizeInBytesCst()->getZExtValue(); - } - /// This is just like getRawDest, but it strips off any cast - /// instructions that feed it, giving the original input. The returned - /// value is guaranteed to be a pointer. + /// instructions (including addrspacecast) that feed it, giving the + /// original input. The returned value is guaranteed to be a pointer. Value *getDest() const { return getRawDest()->stripPointerCasts(); } - /// This is just like getRawSource, but it strips off any cast - /// instructions that feed it, giving the original input. The returned - /// value is guaranteed to be a pointer. - Value *getSource() const { return getRawSource()->stripPointerCasts(); } - unsigned getDestAddressSpace() const { return cast<PointerType>(getRawDest()->getType())->getAddressSpace(); } - unsigned getSourceAddressSpace() const { - return cast<PointerType>(getRawSource()->getType())->getAddressSpace(); - } - /// Set the specified arguments of the instruction. void setDest(Value *Ptr) { assert(getRawDest()->getType() == Ptr->getType() && @@ -270,58 +250,20 @@ namespace llvm { setArgOperand(ARG_DEST, Ptr); } - void setSource(Value *Ptr) { - assert(getRawSource()->getType() == Ptr->getType() && - "setSource called with pointer of wrong type!"); - setArgOperand(ARG_SOURCE, Ptr); - } - void setLength(Value *L) { assert(getLength()->getType() == L->getType() && "setLength called with value of wrong type!"); setArgOperand(ARG_LENGTH, L); } - - void setElementSizeInBytes(Constant *V) { - assert(V->getType() == Type::getInt8Ty(getContext()) && - "setElementSizeInBytes called with value of wrong type!"); - setArgOperand(ARG_ELEMENTSIZE, V); - } - - static bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memcpy_element_unordered_atomic; - } - static bool classof(const Value *V) { - return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); - } }; - class ElementUnorderedAtomicMemMoveInst : public IntrinsicInst { + // The common base class for the atomic memset/memmove/memcpy intrinsics + // i.e. llvm.element.unordered.atomic.memset/memcpy/memmove + class AtomicMemIntrinsic : public MemIntrinsicBase<AtomicMemIntrinsic> { private: - enum { ARG_DEST = 0, ARG_SOURCE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + enum { ARG_ELEMENTSIZE = 3 }; public: - Value *getRawDest() const { - return const_cast<Value *>(getArgOperand(ARG_DEST)); - } - const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } - Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } - - /// Return the arguments to the instruction. - Value *getRawSource() const { - return const_cast<Value *>(getArgOperand(ARG_SOURCE)); - } - const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); } - Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); } - - Value *getLength() const { - return const_cast<Value *>(getArgOperand(ARG_LENGTH)); - } - const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } - Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } - - bool isVolatile() const { return false; } - Value *getRawElementSizeInBytes() const { return const_cast<Value *>(getArgOperand(ARG_ELEMENTSIZE)); } @@ -334,150 +276,129 @@ namespace llvm { return getElementSizeInBytesCst()->getZExtValue(); } - /// This is just like getRawDest, but it strips off any cast - /// instructions that feed it, giving the original input. The returned - /// value is guaranteed to be a pointer. - Value *getDest() const { return getRawDest()->stripPointerCasts(); } - - /// This is just like getRawSource, but it strips off any cast - /// instructions that feed it, giving the original input. The returned - /// value is guaranteed to be a pointer. - Value *getSource() const { return getRawSource()->stripPointerCasts(); } - - unsigned getDestAddressSpace() const { - return cast<PointerType>(getRawDest()->getType())->getAddressSpace(); - } - - unsigned getSourceAddressSpace() const { - return cast<PointerType>(getRawSource()->getType())->getAddressSpace(); - } - - /// Set the specified arguments of the instruction. - void setDest(Value *Ptr) { - assert(getRawDest()->getType() == Ptr->getType() && - "setDest called with pointer of wrong type!"); - setArgOperand(ARG_DEST, Ptr); - } - - void setSource(Value *Ptr) { - assert(getRawSource()->getType() == Ptr->getType() && - "setSource called with pointer of wrong type!"); - setArgOperand(ARG_SOURCE, Ptr); - } - - void setLength(Value *L) { - assert(getLength()->getType() == L->getType() && - "setLength called with value of wrong type!"); - setArgOperand(ARG_LENGTH, L); - } - void setElementSizeInBytes(Constant *V) { assert(V->getType() == Type::getInt8Ty(getContext()) && "setElementSizeInBytes called with value of wrong type!"); setArgOperand(ARG_ELEMENTSIZE, V); } - static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memmove_element_unordered_atomic; + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + case Intrinsic::memset_element_unordered_atomic: + return true; + default: + return false; + } } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); } }; /// This class represents atomic memset intrinsic - /// TODO: Integrate this class into MemIntrinsic hierarchy; for now this is - /// C&P of all methods from that hierarchy - class ElementUnorderedAtomicMemSetInst : public IntrinsicInst { + // i.e. llvm.element.unordered.atomic.memset + class AtomicMemSetInst : public AtomicMemIntrinsic { private: - enum { ARG_DEST = 0, ARG_VALUE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + enum { ARG_VALUE = 1 }; public: - Value *getRawDest() const { - return const_cast<Value *>(getArgOperand(ARG_DEST)); + Value *getValue() const { + return const_cast<Value *>(getArgOperand(ARG_VALUE)); } - const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } - Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } - - Value *getValue() const { return const_cast<Value*>(getArgOperand(ARG_VALUE)); } const Use &getValueUse() const { return getArgOperandUse(ARG_VALUE); } Use &getValueUse() { return getArgOperandUse(ARG_VALUE); } - Value *getLength() const { - return const_cast<Value *>(getArgOperand(ARG_LENGTH)); + void setValue(Value *Val) { + assert(getValue()->getType() == Val->getType() && + "setValue called with value of wrong type!"); + setArgOperand(ARG_VALUE, Val); } - const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } - Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } - - bool isVolatile() const { return false; } - Value *getRawElementSizeInBytes() const { - return const_cast<Value *>(getArgOperand(ARG_ELEMENTSIZE)); + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memset_element_unordered_atomic; } - - ConstantInt *getElementSizeInBytesCst() const { - return cast<ConstantInt>(getRawElementSizeInBytes()); + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); } + }; - uint32_t getElementSizeInBytes() const { - return getElementSizeInBytesCst()->getZExtValue(); + // This class wraps the atomic memcpy/memmove intrinsics + // i.e. llvm.element.unordered.atomic.memcpy/memmove + class AtomicMemTransferInst : public AtomicMemIntrinsic { + private: + enum { ARG_SOURCE = 1 }; + + public: + /// Return the arguments to the instruction. + Value *getRawSource() const { + return const_cast<Value *>(getArgOperand(ARG_SOURCE)); } + const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); } + Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); } - /// This is just like getRawDest, but it strips off any cast + /// This is just like getRawSource, but it strips off any cast /// instructions that feed it, giving the original input. The returned /// value is guaranteed to be a pointer. - Value *getDest() const { return getRawDest()->stripPointerCasts(); } + Value *getSource() const { return getRawSource()->stripPointerCasts(); } - unsigned getDestAddressSpace() const { - return cast<PointerType>(getRawDest()->getType())->getAddressSpace(); + unsigned getSourceAddressSpace() const { + return cast<PointerType>(getRawSource()->getType())->getAddressSpace(); } - /// Set the specified arguments of the instruction. - void setDest(Value *Ptr) { - assert(getRawDest()->getType() == Ptr->getType() && - "setDest called with pointer of wrong type!"); - setArgOperand(ARG_DEST, Ptr); + void setSource(Value *Ptr) { + assert(getRawSource()->getType() == Ptr->getType() && + "setSource called with pointer of wrong type!"); + setArgOperand(ARG_SOURCE, Ptr); } - void setValue(Value *Val) { - assert(getValue()->getType() == Val->getType() && - "setValue called with value of wrong type!"); - setArgOperand(ARG_VALUE, Val); + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + return true; + default: + return false; + } } - - void setLength(Value *L) { - assert(getLength()->getType() == L->getType() && - "setLength called with value of wrong type!"); - setArgOperand(ARG_LENGTH, L); + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); } + }; - void setElementSizeInBytes(Constant *V) { - assert(V->getType() == Type::getInt8Ty(getContext()) && - "setElementSizeInBytes called with value of wrong type!"); - setArgOperand(ARG_ELEMENTSIZE, V); + /// This class represents the atomic memcpy intrinsic + /// i.e. llvm.element.unordered.atomic.memcpy + class AtomicMemCpyInst : public AtomicMemTransferInst { + public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memcpy_element_unordered_atomic; + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); } + }; - static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memset_element_unordered_atomic; + /// This class represents the atomic memmove intrinsic + /// i.e. llvm.element.unordered.atomic.memmove + class AtomicMemMoveInst : public AtomicMemTransferInst { + public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memmove_element_unordered_atomic; } - static inline bool classof(const Value *V) { + static bool classof(const Value *V) { return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); } }; /// This is the common base class for memset/memcpy/memmove. - class MemIntrinsic : public IntrinsicInst { - public: - Value *getRawDest() const { return const_cast<Value*>(getArgOperand(0)); } - const Use &getRawDestUse() const { return getArgOperandUse(0); } - Use &getRawDestUse() { return getArgOperandUse(0); } - - Value *getLength() const { return const_cast<Value*>(getArgOperand(2)); } - const Use &getLengthUse() const { return getArgOperandUse(2); } - Use &getLengthUse() { return getArgOperandUse(2); } + class MemIntrinsic : public MemIntrinsicBase<MemIntrinsic> { + private: + enum { ARG_ALIGN = 3, ARG_VOLATILE = 4 }; + public: ConstantInt *getAlignmentCst() const { - return cast<ConstantInt>(const_cast<Value*>(getArgOperand(3))); + return cast<ConstantInt>(const_cast<Value *>(getArgOperand(ARG_ALIGN))); } unsigned getAlignment() const { @@ -485,45 +406,20 @@ namespace llvm { } ConstantInt *getVolatileCst() const { - return cast<ConstantInt>(const_cast<Value*>(getArgOperand(4))); + return cast<ConstantInt>( + const_cast<Value *>(getArgOperand(ARG_VOLATILE))); } bool isVolatile() const { return !getVolatileCst()->isZero(); } - unsigned getDestAddressSpace() const { - return cast<PointerType>(getRawDest()->getType())->getAddressSpace(); - } - - /// This is just like getRawDest, but it strips off any cast - /// instructions that feed it, giving the original input. The returned - /// value is guaranteed to be a pointer. - Value *getDest() const { return getRawDest()->stripPointerCasts(); } - - /// Set the specified arguments of the instruction. - void setDest(Value *Ptr) { - assert(getRawDest()->getType() == Ptr->getType() && - "setDest called with pointer of wrong type!"); - setArgOperand(0, Ptr); - } - - void setLength(Value *L) { - assert(getLength()->getType() == L->getType() && - "setLength called with value of wrong type!"); - setArgOperand(2, L); - } - - void setAlignment(Constant* A) { - setArgOperand(3, A); - } + void setAlignment(Constant *A) { setArgOperand(ARG_ALIGN, A); } - void setVolatile(Constant* V) { - setArgOperand(4, V); - } + void setVolatile(Constant *V) { setArgOperand(ARG_VOLATILE, V); } Type *getAlignmentType() const { - return getArgOperand(3)->getType(); + return getArgOperand(ARG_ALIGN)->getType(); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -621,6 +517,155 @@ namespace llvm { } }; + // The common base class for any memset/memmove/memcpy intrinsics; + // whether they be atomic or non-atomic. + // i.e. llvm.element.unordered.atomic.memset/memcpy/memmove + // and llvm.memset/memcpy/memmove + class AnyMemIntrinsic : public MemIntrinsicBase<AnyMemIntrinsic> { + public: + bool isVolatile() const { + // Only the non-atomic intrinsics can be volatile + if (auto *MI = dyn_cast<MemIntrinsic>(this)) + return MI->isVolatile(); + return false; + } + + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + case Intrinsic::memset_element_unordered_atomic: + return true; + default: + return false; + } + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + }; + + /// This class represents any memset intrinsic + // i.e. llvm.element.unordered.atomic.memset + // and llvm.memset + class AnyMemSetInst : public AnyMemIntrinsic { + private: + enum { ARG_VALUE = 1 }; + + public: + Value *getValue() const { + return const_cast<Value *>(getArgOperand(ARG_VALUE)); + } + const Use &getValueUse() const { return getArgOperandUse(ARG_VALUE); } + Use &getValueUse() { return getArgOperandUse(ARG_VALUE); } + + void setValue(Value *Val) { + assert(getValue()->getType() == Val->getType() && + "setValue called with value of wrong type!"); + setArgOperand(ARG_VALUE, Val); + } + + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + case Intrinsic::memset: + case Intrinsic::memset_element_unordered_atomic: + return true; + default: + return false; + } + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + }; + + // This class wraps any memcpy/memmove intrinsics + // i.e. llvm.element.unordered.atomic.memcpy/memmove + // and llvm.memcpy/memmove + class AnyMemTransferInst : public AnyMemIntrinsic { + private: + enum { ARG_SOURCE = 1 }; + + public: + /// Return the arguments to the instruction. + Value *getRawSource() const { + return const_cast<Value *>(getArgOperand(ARG_SOURCE)); + } + const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); } + Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); } + + /// This is just like getRawSource, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getSource() const { return getRawSource()->stripPointerCasts(); } + + unsigned getSourceAddressSpace() const { + return cast<PointerType>(getRawSource()->getType())->getAddressSpace(); + } + + void setSource(Value *Ptr) { + assert(getRawSource()->getType() == Ptr->getType() && + "setSource called with pointer of wrong type!"); + setArgOperand(ARG_SOURCE, Ptr); + } + + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + return true; + default: + return false; + } + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + }; + + /// This class represents any memcpy intrinsic + /// i.e. llvm.element.unordered.atomic.memcpy + /// and llvm.memcpy + class AnyMemCpyInst : public AnyMemTransferInst { + public: + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memcpy_element_unordered_atomic: + return true; + default: + return false; + } + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + }; + + /// This class represents any memmove intrinsic + /// i.e. llvm.element.unordered.atomic.memmove + /// and llvm.memmove + class AnyMemMoveInst : public AnyMemTransferInst { + public: + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + case Intrinsic::memmove: + case Intrinsic::memmove_element_unordered_atomic: + return true; + default: + return false; + } + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + }; + /// This represents the llvm.va_start intrinsic. class VAStartInst : public IntrinsicInst { public: diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 14c88e519435..07de0568cab0 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -490,6 +490,13 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_fma : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + // These intrinsics are sensitive to the rounding mode so we need constrained // versions of each of them. When strict rounding and exception control are // not required the non-constrained versions of these intrinsics should be @@ -576,10 +583,14 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in { let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_dbg_declare : Intrinsic<[], [llvm_metadata_ty, - llvm_metadata_ty, - llvm_metadata_ty]>; + llvm_metadata_ty, + llvm_metadata_ty]>; def int_dbg_value : Intrinsic<[], - [llvm_metadata_ty, llvm_i64_ty, + [llvm_metadata_ty, + llvm_metadata_ty, + llvm_metadata_ty]>; + def int_dbg_addr : Intrinsic<[], + [llvm_metadata_ty, llvm_metadata_ty, llvm_metadata_ty]>; } @@ -634,6 +645,13 @@ def int_annotation : Intrinsic<[llvm_anyint_ty], llvm_ptr_ty, llvm_i32_ty], [], "llvm.annotation">; +// Annotates the current program point with metadata strings which are emitted +// as CodeView debug info records. This is expensive, as it disables inlining +// and is modelled as having side effects. +def int_codeview_annotation : Intrinsic<[], [llvm_metadata_ty], + [IntrInaccessibleMemOnly, IntrNoDuplicate], + "llvm.codeview.annotation">; + //===------------------------ Trampoline Intrinsics -----------------------===// // def int_init_trampoline : Intrinsic<[], @@ -693,8 +711,8 @@ def int_invariant_end : Intrinsic<[], // which is valid. // The argument also can't be marked with 'returned' attribute, because // it would remove barrier. -def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], - [llvm_ptr_ty], +def int_invariant_group_barrier : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>], [IntrReadMem, IntrArgMemOnly]>; //===------------------------ Stackmap Intrinsics -------------------------===// @@ -792,6 +810,12 @@ def int_experimental_guard : Intrinsic<[], [llvm_i1_ty, llvm_vararg_ty], // NOP: calls/invokes to this intrinsic are removed by codegen def int_donothing : Intrinsic<[], [], [IntrNoMem]>; +// This instruction has no actual effect, though it is treated by the optimizer +// has having opaque side effects. This may be inserted into loops to ensure +// that they are not removed even if they turn out to be empty, for languages +// which specify that infinite loops must be preserved. +def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly]>; + // Intrisics to support half precision floating point format let IntrProperties = [IntrNoMem] in { def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 2c45d148e34b..65c9aaab975d 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -40,9 +40,9 @@ def int_aarch64_hint : Intrinsic<[], [llvm_i32_ty]>; //===----------------------------------------------------------------------===// // Data Barrier Instructions -def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, Intrinsic<[], [llvm_i32_ty]>; -def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, Intrinsic<[], [llvm_i32_ty]>; -def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, Intrinsic<[], [llvm_i32_ty]>; +def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, Intrinsic<[], [llvm_i32_ty]>; +def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, Intrinsic<[], [llvm_i32_ty]>; +def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, Intrinsic<[], [llvm_i32_ty]>; } diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 4e0529a32d29..d7999cd33231 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -294,7 +294,7 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin; def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin; -class AMDGPUImageLoad : Intrinsic < +class AMDGPUImageLoad<bit NoMem = 0> : Intrinsic < [llvm_anyfloat_ty], // vdata(VGPR) [llvm_anyint_ty, // vaddr(VGPR) llvm_anyint_ty, // rsrc(SGPR) @@ -303,11 +303,11 @@ class AMDGPUImageLoad : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - [IntrReadMem]>; + !if(NoMem, [IntrNoMem], [IntrReadMem])>; def int_amdgcn_image_load : AMDGPUImageLoad; def int_amdgcn_image_load_mip : AMDGPUImageLoad; -def int_amdgcn_image_getresinfo : AMDGPUImageLoad; +def int_amdgcn_image_getresinfo : AMDGPUImageLoad<1>; class AMDGPUImageStore : Intrinsic < [], @@ -324,7 +324,7 @@ class AMDGPUImageStore : Intrinsic < def int_amdgcn_image_store : AMDGPUImageStore; def int_amdgcn_image_store_mip : AMDGPUImageStore; -class AMDGPUImageSample : Intrinsic < +class AMDGPUImageSample<bit NoMem = 0> : Intrinsic < [llvm_anyfloat_ty], // vdata(VGPR) [llvm_anyfloat_ty, // vaddr(VGPR) llvm_anyint_ty, // rsrc(SGPR) @@ -335,7 +335,7 @@ class AMDGPUImageSample : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - [IntrReadMem]>; + !if(NoMem, [IntrNoMem], [IntrReadMem])>; // Basic sample def int_amdgcn_image_sample : AMDGPUImageSample; @@ -417,7 +417,7 @@ def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample; def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample; def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample; -def int_amdgcn_image_getlod : AMDGPUImageSample; +def int_amdgcn_image_getlod : AMDGPUImageSample<1>; class AMDGPUImageAtomic : Intrinsic < [llvm_i32_ty], @@ -570,7 +570,7 @@ def int_amdgcn_s_dcache_inv : def int_amdgcn_s_memtime : GCCBuiltin<"__builtin_amdgcn_s_memtime">, - Intrinsic<[llvm_i64_ty], [], []>; + Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>; def int_amdgcn_s_sleep : GCCBuiltin<"__builtin_amdgcn_s_sleep">, @@ -740,6 +740,41 @@ def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; + +// Copies the source value to the destination value, with the guarantee that +// the source value is computed as if the entire program were executed in WQM. +def int_amdgcn_wqm : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] +>; + +// Return true if at least one thread within the pixel quad passes true into +// the function. +def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], + [llvm_i1_ty], [IntrNoMem, IntrConvergent] +>; + +// If false, set EXEC=0 for the current thread until the end of program. +def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>; + +// Copies the active channels of the source value to the destination value, +// with the guarantee that the source value is computed as if the entire +// program were executed in Whole Wavefront Mode, i.e. with all channels +// enabled, with a few exceptions: - Phi nodes with require WWM return an +// undefined value. +def int_amdgcn_wwm : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] +>; + +// Given a value, copies it while setting all the inactive lanes to a given +// value. Note that OpenGL helper lanes are considered active, so if the +// program ever uses WQM, then the instruction and the first source will be +// computed in WQM. +def int_amdgcn_set_inactive : + Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, // value to be copied + LLVMMatchType<0>], // value for the inactive lanes to take + [IntrNoMem, IntrConvergent]>; + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// @@ -762,6 +797,15 @@ def int_amdgcn_mov_dpp : [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>; +// llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> +// Should be equivalent to: +// v_mov_b32 <dest> <old> +// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> +def int_amdgcn_update_dpp : + Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>; + def int_amdgcn_s_dcache_wb : GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, Intrinsic<[], [], []>; @@ -772,7 +816,7 @@ def int_amdgcn_s_dcache_wb_vol : def int_amdgcn_s_memrealtime : GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, - Intrinsic<[llvm_i64_ty], [], []>; + Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>; // llvm.amdgcn.ds.permute <index> <src> def int_amdgcn_ds_permute : diff --git a/include/llvm/IR/IntrinsicsHexagon.td b/include/llvm/IR/IntrinsicsHexagon.td index 098245344725..5c96702bca76 100644 --- a/include/llvm/IR/IntrinsicsHexagon.td +++ b/include/llvm/IR/IntrinsicsHexagon.td @@ -5044,7 +5044,6 @@ def int_hexagon_V6_vassignp_128B : Hexagon_v2048v2048_Intrinsic_T<"HEXAGON_V6_vassignp_128B">; - // // Hexagon_iii_Intrinsic<string GCCIntSuffix> // tag : S6_rol_i_r @@ -5583,54 +5582,6 @@ class Hexagon_v1024i_Intrinsic<string GCCIntSuffix> [IntrNoMem]>; // -// Hexagon_v512v512LLii_Intrinsic<string GCCIntSuffix> -// tag : V6_vlutb -class Hexagon_v512v512LLii_Intrinsic<string GCCIntSuffix> - : Hexagon_Intrinsic<GCCIntSuffix, - [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i64_ty,llvm_i32_ty], - [IntrNoMem]>; - -// -// Hexagon_v1024v1024LLii_Intrinsic<string GCCIntSuffix> -// tag : V6_vlutb_128B -class Hexagon_v1024v1024LLii_Intrinsic<string GCCIntSuffix> - : Hexagon_Intrinsic<GCCIntSuffix, - [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i64_ty,llvm_i32_ty], - [IntrNoMem]>; - -// -// Hexagon_v512v512v512LLii_Intrinsic<string GCCIntSuffix> -// tag : V6_vlutb_acc -class Hexagon_v512v512v512LLii_Intrinsic<string GCCIntSuffix> - : Hexagon_Intrinsic<GCCIntSuffix, - [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i64_ty,llvm_i32_ty], - [IntrNoMem]>; - -// -// Hexagon_v1024v1024v1024LLii_Intrinsic<string GCCIntSuffix> -// tag : V6_vlutb_acc_128B -class Hexagon_v1024v1024v1024LLii_Intrinsic<string GCCIntSuffix> - : Hexagon_Intrinsic<GCCIntSuffix, - [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i64_ty,llvm_i32_ty], - [IntrNoMem]>; - -// -// Hexagon_v2048v2048LLii_Intrinsic<string GCCIntSuffix> -// tag : V6_vlutb_dv_128B -class Hexagon_v2048v2048LLii_Intrinsic<string GCCIntSuffix> - : Hexagon_Intrinsic<GCCIntSuffix, - [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i64_ty,llvm_i32_ty], - [IntrNoMem]>; - -// -// Hexagon_v2048v2048v2048LLii_Intrinsic<string GCCIntSuffix> -// tag : V6_vlutb_dv_acc_128B -class Hexagon_v2048v2048v2048LLii_Intrinsic<string GCCIntSuffix> - : Hexagon_Intrinsic<GCCIntSuffix, - [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i64_ty,llvm_i32_ty], - [IntrNoMem]>; - -// // Hexagon_v512v512v512v512i_Intrinsic<string GCCIntSuffix> // tag : V6_vlutvvb_oracc class Hexagon_v512v512v512v512i_Intrinsic<string GCCIntSuffix> @@ -9167,54 +9118,6 @@ def int_hexagon_V6_vcombine_128B : Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vcombine_128B">; // -// BUILTIN_INFO(HEXAGON.V6_vlutb,VI_ftype_VIDISI,3) -// tag : V6_vlutb -def int_hexagon_V6_vlutb : -Hexagon_v512v512LLii_Intrinsic<"HEXAGON_V6_vlutb">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_128B,VI_ftype_VIDISI,3) -// tag : V6_vlutb_128B -def int_hexagon_V6_vlutb_128B : -Hexagon_v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_128B">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_acc,VI_ftype_VIVIDISI,4) -// tag : V6_vlutb_acc -def int_hexagon_V6_vlutb_acc : -Hexagon_v512v512v512LLii_Intrinsic<"HEXAGON_V6_vlutb_acc">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_acc_128B,VI_ftype_VIVIDISI,4) -// tag : V6_vlutb_acc_128B -def int_hexagon_V6_vlutb_acc_128B : -Hexagon_v1024v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_acc_128B">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_dv,VD_ftype_VDDISI,3) -// tag : V6_vlutb_dv -def int_hexagon_V6_vlutb_dv : -Hexagon_v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_dv">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_128B,VD_ftype_VDDISI,3) -// tag : V6_vlutb_dv_128B -def int_hexagon_V6_vlutb_dv_128B : -Hexagon_v2048v2048LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_128B">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_acc,VD_ftype_VDVDDISI,4) -// tag : V6_vlutb_dv_acc -def int_hexagon_V6_vlutb_dv_acc : -Hexagon_v1024v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_acc">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_acc_128B,VD_ftype_VDVDDISI,4) -// tag : V6_vlutb_dv_acc_128B -def int_hexagon_V6_vlutb_dv_acc_128B : -Hexagon_v2048v2048v2048LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_acc_128B">; - -// // BUILTIN_INFO(HEXAGON.V6_vdelta,VI_ftype_VIVI,2) // tag : V6_vdelta def int_hexagon_V6_vdelta : @@ -9349,6 +9252,30 @@ Hexagon_v2048v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc_128B">; // // Masked vector stores // +def int_hexagon_V6_vS32b_qpred_ai : +Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai">; + +def int_hexagon_V6_vS32b_nqpred_ai : +Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai">; + +def int_hexagon_V6_vS32b_nt_qpred_ai : +Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai">; + +def int_hexagon_V6_vS32b_nt_nqpred_ai : +Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai">; + +def int_hexagon_V6_vS32b_qpred_ai_128B : +Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai_128B">; + +def int_hexagon_V6_vS32b_nqpred_ai_128B : +Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai_128B">; + +def int_hexagon_V6_vS32b_nt_qpred_ai_128B : +Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai_128B">; + +def int_hexagon_V6_vS32b_nt_nqpred_ai_128B : +Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai_128B">; + def int_hexagon_V6_vmaskedstoreq : Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstoreq">; @@ -9642,6 +9569,20 @@ class Hexagon_V62_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix> [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty], [IntrNoMem]>; +// Hexagon_v512v64iv512v512v64i_Intrinsic<string GCCIntSuffix> +// tag: V6_vaddcarry +class Hexagon_v512v64iv512v512v64i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v16i32_ty, llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty], + [IntrNoMem]>; + +// Hexagon_v1024v128iv1024v1024v128i_Intrinsic<string GCCIntSuffix> +// tag: V6_vaddcarry_128B +class Hexagon_v1024v128iv1024v1024v128i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty, llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty], + [IntrNoMem]>; + // // BUILTIN_INFO(HEXAGON.M6_vabsdiffb,DI_ftype_DIDI,2) @@ -10213,3 +10154,821 @@ Hexagon_V62_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_nm">; def int_hexagon_V6_vlutvwh_nm_128B : Hexagon_V62_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_nm_128B">; +// +// BUILTIN_INFO(HEXAGON.V6_vaddcarry,VI_ftype_VIVIQV,3) +// tag: V6_vaddcarry +def int_hexagon_V6_vaddcarry : +Hexagon_v512v64iv512v512v64i_Intrinsic<"HEXAGON_v6_vaddcarry">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddcarry_128B,VI_ftype_VIVIQV,3) +// tag: V6_vaddcarry_128B +def int_hexagon_V6_vaddcarry_128B : +Hexagon_v1024v128iv1024v1024v128i_Intrinsic<"HEXAGON_v6_vaddcarry_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubcarry,VI_ftype_VIVIQV,3) +// tag: V6_vsubcarry +def int_hexagon_V6_vsubcarry : +Hexagon_v512v64iv512v512v64i_Intrinsic<"HEXAGON_v6_vsubcarry">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubcarry_128B,VI_ftype_VIVIQV,3) +// tag: V6_vsubcarry_128B +def int_hexagon_V6_vsubcarry_128B : +Hexagon_v1024v128iv1024v1024v128i_Intrinsic<"HEXAGON_v6_vsubcarry_128B">; + + +/// +/// HexagonV65 intrinsics +/// + +// +// Hexagon_V65_iLLiLLi_Intrinsic<string GCCIntSuffix> +// tag : A6_vcmpbeq_notany +class Hexagon_V65_iLLiLLi_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v1024v512LLi_Intrinsic<string GCCIntSuffix> +// tag : V6_vrmpyub_rtt +class Hexagon_V65_v1024v512LLi_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i64_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v2048v1024LLi_Intrinsic<string GCCIntSuffix> +// tag : V6_vrmpyub_rtt_128B +class Hexagon_V65_v2048v1024LLi_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i64_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v1024v1024v512LLi_Intrinsic<string GCCIntSuffix> +// tag : V6_vrmpyub_rtt_acc +class Hexagon_V65_v1024v1024v512LLi_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i64_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v2048v2048v1024LLi_Intrinsic<string GCCIntSuffix> +// tag : V6_vrmpyub_rtt_acc_128B +class Hexagon_V65_v2048v2048v1024LLi_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i64_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v512v512v512i_Intrinsic<string GCCIntSuffix> +// tag : V6_vasruwuhsat +class Hexagon_V65_v512v512v512i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v1024v1024v1024i_Intrinsic<string GCCIntSuffix> +// tag : V6_vasruwuhsat_128B +class Hexagon_V65_v1024v1024v1024i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v512v512v512_Intrinsic<string GCCIntSuffix> +// tag : V6_vavguw +class Hexagon_V65_v512v512v512_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v1024v1024v1024_Intrinsic<string GCCIntSuffix> +// tag : V6_vavguw_128B +class Hexagon_V65_v1024v1024v1024_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v512v512_Intrinsic<string GCCIntSuffix> +// tag : V6_vabsb +class Hexagon_V65_v512v512_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v16i32_ty], [llvm_v16i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v1024v1024_Intrinsic<string GCCIntSuffix> +// tag : V6_vabsb_128B +class Hexagon_V65_v1024v1024_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty], [llvm_v32i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v1024v1024i_Intrinsic<string GCCIntSuffix> +// tag : V6_vmpabuu +class Hexagon_V65_v1024v1024i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v2048v2048i_Intrinsic<string GCCIntSuffix> +// tag : V6_vmpabuu_128B +class Hexagon_V65_v2048v2048i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v2048v2048v2048i_Intrinsic<string GCCIntSuffix> +// tag : V6_vmpabuu_acc_128B +class Hexagon_V65_v2048v2048v2048i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v1024v1024v512i_Intrinsic<string GCCIntSuffix> +// tag : V6_vmpyh_acc +class Hexagon_V65_v1024v1024v512i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v2048v2048v1024i_Intrinsic<string GCCIntSuffix> +// tag : V6_vmpyh_acc_128B +class Hexagon_V65_v2048v2048v1024i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v512v512v512LLi_Intrinsic<string GCCIntSuffix> +// tag : V6_vmpahhsat +class Hexagon_V65_v512v512v512LLi_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i64_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v1024v1024v1024LLi_Intrinsic<string GCCIntSuffix> +// tag : V6_vmpahhsat_128B +class Hexagon_V65_v1024v1024v1024LLi_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i64_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v512v512LLi_Intrinsic<string GCCIntSuffix> +// tag : V6_vlut4 +class Hexagon_V65_v512v512LLi_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i64_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v1024v1024LLi_Intrinsic<string GCCIntSuffix> +// tag : V6_vlut4_128B +class Hexagon_V65_v1024v1024LLi_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i64_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v512v512i_Intrinsic<string GCCIntSuffix> +// tag : V6_vmpyuhe +class Hexagon_V65_v512v512i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v512v64i_Intrinsic<string GCCIntSuffix> +// tag : V6_vprefixqb +class Hexagon_V65_v512v64i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v16i32_ty], [llvm_v512i1_ty], + [IntrNoMem]>; + +// +// Hexagon_V65_v1024v128i_Intrinsic<string GCCIntSuffix> +// tag : V6_vprefixqb_128B +class Hexagon_V65_v1024v128i_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v32i32_ty], [llvm_v1024i1_ty], + [IntrNoMem]>; + +// +// BUILTIN_INFO(HEXAGON.A6_vcmpbeq_notany,QI_ftype_DIDI,2) +// tag : A6_vcmpbeq_notany +def int_hexagon_A6_vcmpbeq_notany : +Hexagon_V65_iLLiLLi_Intrinsic<"HEXAGON_A6_vcmpbeq_notany">; + +// +// BUILTIN_INFO(HEXAGON.A6_vcmpbeq_notany_128B,QI_ftype_DIDI,2) +// tag : A6_vcmpbeq_notany_128B +def int_hexagon_A6_vcmpbeq_notany_128B : +Hexagon_V65_iLLiLLi_Intrinsic<"HEXAGON_A6_vcmpbeq_notany_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt,VD_ftype_VIDI,2) +// tag : V6_vrmpyub_rtt +def int_hexagon_V6_vrmpyub_rtt : +Hexagon_V65_v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_128B,VD_ftype_VIDI,2) +// tag : V6_vrmpyub_rtt_128B +def int_hexagon_V6_vrmpyub_rtt_128B : +Hexagon_V65_v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_acc,VD_ftype_VDVIDI,3) +// tag : V6_vrmpyub_rtt_acc +def int_hexagon_V6_vrmpyub_rtt_acc : +Hexagon_V65_v1024v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_acc_128B,VD_ftype_VDVIDI,3) +// tag : V6_vrmpyub_rtt_acc_128B +def int_hexagon_V6_vrmpyub_rtt_acc_128B : +Hexagon_V65_v2048v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt,VD_ftype_VIDI,2) +// tag : V6_vrmpybub_rtt +def int_hexagon_V6_vrmpybub_rtt : +Hexagon_V65_v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_128B,VD_ftype_VIDI,2) +// tag : V6_vrmpybub_rtt_128B +def int_hexagon_V6_vrmpybub_rtt_128B : +Hexagon_V65_v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_acc,VD_ftype_VDVIDI,3) +// tag : V6_vrmpybub_rtt_acc +def int_hexagon_V6_vrmpybub_rtt_acc : +Hexagon_V65_v1024v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_acc_128B,VD_ftype_VDVIDI,3) +// tag : V6_vrmpybub_rtt_acc_128B +def int_hexagon_V6_vrmpybub_rtt_acc_128B : +Hexagon_V65_v2048v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruwuhsat,VI_ftype_VIVISI,3) +// tag : V6_vasruwuhsat +def int_hexagon_V6_vasruwuhsat : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruwuhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruwuhsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasruwuhsat_128B +def int_hexagon_V6_vasruwuhsat_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruwuhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruhubsat,VI_ftype_VIVISI,3) +// tag : V6_vasruhubsat +def int_hexagon_V6_vasruhubsat : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruhubsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruhubsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasruhubsat_128B +def int_hexagon_V6_vasruhubsat_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruhubsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruhubrndsat,VI_ftype_VIVISI,3) +// tag : V6_vasruhubrndsat +def int_hexagon_V6_vasruhubrndsat : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruhubrndsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruhubrndsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasruhubrndsat_128B +def int_hexagon_V6_vasruhubrndsat_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruhubrndsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslh_acc,VI_ftype_VIVISI,3) +// tag : V6_vaslh_acc +def int_hexagon_V6_vaslh_acc : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vaslh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslh_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vaslh_acc_128B +def int_hexagon_V6_vaslh_acc_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vaslh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrh_acc,VI_ftype_VIVISI,3) +// tag : V6_vasrh_acc +def int_hexagon_V6_vasrh_acc : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrh_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vasrh_acc_128B +def int_hexagon_V6_vasrh_acc_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguw,VI_ftype_VIVI,2) +// tag : V6_vavguw +def int_hexagon_V6_vavguw : +Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavguw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguw_128B,VI_ftype_VIVI,2) +// tag : V6_vavguw_128B +def int_hexagon_V6_vavguw_128B : +Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguwrnd,VI_ftype_VIVI,2) +// tag : V6_vavguwrnd +def int_hexagon_V6_vavguwrnd : +Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavguwrnd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguwrnd_128B,VI_ftype_VIVI,2) +// tag : V6_vavguwrnd_128B +def int_hexagon_V6_vavguwrnd_128B : +Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguwrnd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgb,VI_ftype_VIVI,2) +// tag : V6_vavgb +def int_hexagon_V6_vavgb : +Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavgb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgb_128B,VI_ftype_VIVI,2) +// tag : V6_vavgb_128B +def int_hexagon_V6_vavgb_128B : +Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgbrnd,VI_ftype_VIVI,2) +// tag : V6_vavgbrnd +def int_hexagon_V6_vavgbrnd : +Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavgbrnd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgbrnd_128B,VI_ftype_VIVI,2) +// tag : V6_vavgbrnd_128B +def int_hexagon_V6_vavgbrnd_128B : +Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgbrnd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnavgb,VI_ftype_VIVI,2) +// tag : V6_vnavgb +def int_hexagon_V6_vnavgb : +Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnavgb_128B,VI_ftype_VIVI,2) +// tag : V6_vnavgb_128B +def int_hexagon_V6_vnavgb_128B : +Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsb,VI_ftype_VI,1) +// tag : V6_vabsb +def int_hexagon_V6_vabsb : +Hexagon_V65_v512v512_Intrinsic<"HEXAGON_V6_vabsb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsb_128B,VI_ftype_VI,1) +// tag : V6_vabsb_128B +def int_hexagon_V6_vabsb_128B : +Hexagon_V65_v1024v1024_Intrinsic<"HEXAGON_V6_vabsb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsb_sat,VI_ftype_VI,1) +// tag : V6_vabsb_sat +def int_hexagon_V6_vabsb_sat : +Hexagon_V65_v512v512_Intrinsic<"HEXAGON_V6_vabsb_sat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsb_sat_128B,VI_ftype_VI,1) +// tag : V6_vabsb_sat_128B +def int_hexagon_V6_vabsb_sat_128B : +Hexagon_V65_v1024v1024_Intrinsic<"HEXAGON_V6_vabsb_sat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabuu,VD_ftype_VDSI,2) +// tag : V6_vmpabuu +def int_hexagon_V6_vmpabuu : +Hexagon_V65_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabuu">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabuu_128B,VD_ftype_VDSI,2) +// tag : V6_vmpabuu_128B +def int_hexagon_V6_vmpabuu_128B : +Hexagon_V65_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabuu_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabuu_acc,VD_ftype_VDVDSI,3) +// tag : V6_vmpabuu_acc +def int_hexagon_V6_vmpabuu_acc : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabuu_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabuu_acc_128B,VD_ftype_VDVDSI,3) +// tag : V6_vmpabuu_acc_128B +def int_hexagon_V6_vmpabuu_acc_128B : +Hexagon_V65_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabuu_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyh_acc,VD_ftype_VDVISI,3) +// tag : V6_vmpyh_acc +def int_hexagon_V6_vmpyh_acc : +Hexagon_V65_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyh_acc_128B,VD_ftype_VDVISI,3) +// tag : V6_vmpyh_acc_128B +def int_hexagon_V6_vmpyh_acc_128B : +Hexagon_V65_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpahhsat,VI_ftype_VIVIDI,3) +// tag : V6_vmpahhsat +def int_hexagon_V6_vmpahhsat : +Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpahhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpahhsat_128B,VI_ftype_VIVIDI,3) +// tag : V6_vmpahhsat_128B +def int_hexagon_V6_vmpahhsat_128B : +Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpahhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpauhuhsat,VI_ftype_VIVIDI,3) +// tag : V6_vmpauhuhsat +def int_hexagon_V6_vmpauhuhsat : +Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpauhuhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpauhuhsat_128B,VI_ftype_VIVIDI,3) +// tag : V6_vmpauhuhsat_128B +def int_hexagon_V6_vmpauhuhsat_128B : +Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpauhuhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpsuhuhsat,VI_ftype_VIVIDI,3) +// tag : V6_vmpsuhuhsat +def int_hexagon_V6_vmpsuhuhsat : +Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpsuhuhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpsuhuhsat_128B,VI_ftype_VIVIDI,3) +// tag : V6_vmpsuhuhsat_128B +def int_hexagon_V6_vmpsuhuhsat_128B : +Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpsuhuhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlut4,VI_ftype_VIDI,2) +// tag : V6_vlut4 +def int_hexagon_V6_vlut4 : +Hexagon_V65_v512v512LLi_Intrinsic<"HEXAGON_V6_vlut4">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlut4_128B,VI_ftype_VIDI,2) +// tag : V6_vlut4_128B +def int_hexagon_V6_vlut4_128B : +Hexagon_V65_v1024v1024LLi_Intrinsic<"HEXAGON_V6_vlut4_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhe,VI_ftype_VISI,2) +// tag : V6_vmpyuhe +def int_hexagon_V6_vmpyuhe : +Hexagon_V65_v512v512i_Intrinsic<"HEXAGON_V6_vmpyuhe">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_128B,VI_ftype_VISI,2) +// tag : V6_vmpyuhe_128B +def int_hexagon_V6_vmpyuhe_128B : +Hexagon_V65_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyuhe_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_acc,VI_ftype_VIVISI,3) +// tag : V6_vmpyuhe_acc +def int_hexagon_V6_vmpyuhe_acc : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyuhe_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vmpyuhe_acc_128B +def int_hexagon_V6_vmpyuhe_acc_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyuhe_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqb,VI_ftype_QV,1) +// tag : V6_vprefixqb +def int_hexagon_V6_vprefixqb : +Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqb_128B,VI_ftype_QV,1) +// tag : V6_vprefixqb_128B +def int_hexagon_V6_vprefixqb_128B : +Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqh,VI_ftype_QV,1) +// tag : V6_vprefixqh +def int_hexagon_V6_vprefixqh : +Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqh_128B,VI_ftype_QV,1) +// tag : V6_vprefixqh_128B +def int_hexagon_V6_vprefixqh_128B : +Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqw,VI_ftype_QV,1) +// tag : V6_vprefixqw +def int_hexagon_V6_vprefixqw : +Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqw_128B,VI_ftype_QV,1) +// tag : V6_vprefixqw_128B +def int_hexagon_V6_vprefixqw_128B : +Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqw_128B">; + + +// The scatter/gather ones below will not be generated from iset.py. Make sure +// you don't overwrite these. +class Hexagon_V65_vvmemiiv512_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty, + llvm_v16i32_ty], + [IntrArgMemOnly]>; + +class Hexagon_V65_vvmemiiv1024_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty, + llvm_v32i32_ty], + [IntrArgMemOnly]>; + +class Hexagon_V65_vvmemiiv2048_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty, + llvm_v64i32_ty], + [IntrArgMemOnly]>; + +class Hexagon_V65_vvmemv64iiiv512_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty, + llvm_i32_ty,llvm_v16i32_ty], + [IntrArgMemOnly]>; + +class Hexagon_V65_vvmemv128iiiv1024_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty, + llvm_i32_ty,llvm_v32i32_ty], + [IntrArgMemOnly]>; + +class Hexagon_V65_vvmemv64iiiv1024_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty, + llvm_i32_ty,llvm_v32i32_ty], + [IntrArgMemOnly]>; + +class Hexagon_V65_vvmemv128iiiv2048_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty, + llvm_i32_ty,llvm_v64i32_ty], + [IntrArgMemOnly]>; + +def int_hexagon_V6_vgathermw : +Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermw">; + +def int_hexagon_V6_vgathermw_128B : +Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermw_128B">; + +def int_hexagon_V6_vgathermh : +Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermh">; + +def int_hexagon_V6_vgathermh_128B : +Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermh_128B">; + +def int_hexagon_V6_vgathermhw : +Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermhw">; + +def int_hexagon_V6_vgathermhw_128B : +Hexagon_V65_vvmemiiv2048_Intrinsic<"HEXAGON_V6_vgathermhw_128B">; + +def int_hexagon_V6_vgathermwq : +Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermwq">; + +def int_hexagon_V6_vgathermwq_128B : +Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermwq_128B">; + +def int_hexagon_V6_vgathermhq : +Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermhq">; + +def int_hexagon_V6_vgathermhq_128B : +Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhq_128B">; + +def int_hexagon_V6_vgathermhwq : +Hexagon_V65_vvmemv64iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhwq">; + +def int_hexagon_V6_vgathermhwq_128B : +Hexagon_V65_vvmemv128iiiv2048_Intrinsic<"HEXAGON_V6_vgathermhwq_128B">; + +class Hexagon_V65_viiv512v512_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_i32_ty,llvm_i32_ty, + llvm_v16i32_ty,llvm_v16i32_ty], + [IntrWriteMem]>; + +class Hexagon_V65_viiv1024v1024_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_i32_ty,llvm_i32_ty, + llvm_v32i32_ty,llvm_v32i32_ty], + [IntrWriteMem]>; + +class Hexagon_V65_vv64iiiv512v512_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_v512i1_ty,llvm_i32_ty, + llvm_i32_ty,llvm_v16i32_ty, + llvm_v16i32_ty], + [IntrWriteMem]>; + +class Hexagon_V65_vv128iiiv1024v1024_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_v1024i1_ty,llvm_i32_ty, + llvm_i32_ty,llvm_v32i32_ty, + llvm_v32i32_ty], + [IntrWriteMem]>; + +class Hexagon_V65_viiv1024v512_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_i32_ty,llvm_i32_ty, + llvm_v32i32_ty,llvm_v16i32_ty], + [IntrWriteMem]>; + +class Hexagon_V65_viiv2048v1024_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_i32_ty,llvm_i32_ty, + llvm_v64i32_ty,llvm_v32i32_ty], + [IntrWriteMem]>; + +class Hexagon_V65_vv64iiiv1024v512_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_v512i1_ty,llvm_i32_ty, + llvm_i32_ty,llvm_v32i32_ty, + llvm_v16i32_ty], + [IntrWriteMem]>; + +class Hexagon_V65_vv128iiiv2048v1024_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [], [llvm_v1024i1_ty,llvm_i32_ty, + llvm_i32_ty,llvm_v64i32_ty, + llvm_v32i32_ty], + [IntrWriteMem]>; + +class Hexagon_V65_v2048_Intrinsic<string GCCIntSuffix> + : Hexagon_Intrinsic<GCCIntSuffix, + [llvm_v64i32_ty], [], + [IntrNoMem]>; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermw,v_ftype_SISIVIVI,4) +// tag : V6_vscattermw +def int_hexagon_V6_vscattermw : +Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermw_128B,v_ftype_SISIVIVI,4) +// tag : V6_vscattermw_128B +def int_hexagon_V6_vscattermw_128B : +Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermh,v_ftype_SISIVIVI,4) +// tag : V6_vscattermh +def int_hexagon_V6_vscattermh : +Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermh_128B,v_ftype_SISIVIVI,4) +// tag : V6_vscattermh_128B +def int_hexagon_V6_vscattermh_128B : +Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermw_add,v_ftype_SISIVIVI,4) +// tag : V6_vscattermw_add +def int_hexagon_V6_vscattermw_add : +Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw_add">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermw_add_128B,v_ftype_SISIVIVI,4) +// tag : V6_vscattermw_add_128B +def int_hexagon_V6_vscattermw_add_128B : +Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_add_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermh_add,v_ftype_SISIVIVI,4) +// tag : V6_vscattermh_add +def int_hexagon_V6_vscattermh_add : +Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh_add">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermh_add_128B,v_ftype_SISIVIVI,4) +// tag : V6_vscattermh_add_128B +def int_hexagon_V6_vscattermh_add_128B : +Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_add_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermwq,v_ftype_QVSISIVIVI,5) +// tag : V6_vscattermwq +def int_hexagon_V6_vscattermwq : +Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermwq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermwq_128B,v_ftype_QVSISIVIVI,5) +// tag : V6_vscattermwq_128B +def int_hexagon_V6_vscattermwq_128B : +Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermwq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhq,v_ftype_QVSISIVIVI,5) +// tag : V6_vscattermhq +def int_hexagon_V6_vscattermhq : +Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermhq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhq_128B,v_ftype_QVSISIVIVI,5) +// tag : V6_vscattermhq_128B +def int_hexagon_V6_vscattermhq_128B : +Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermhq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhw,v_ftype_SISIVDVI,4) +// tag : V6_vscattermhw +def int_hexagon_V6_vscattermhw : +Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhw_128B,v_ftype_SISIVDVI,4) +// tag : V6_vscattermhw_128B +def int_hexagon_V6_vscattermhw_128B : +Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhwq,v_ftype_QVSISIVDVI,5) +// tag : V6_vscattermhwq +def int_hexagon_V6_vscattermhwq : +Hexagon_V65_vv64iiiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhwq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhwq_128B,v_ftype_QVSISIVDVI,5) +// tag : V6_vscattermhwq_128B +def int_hexagon_V6_vscattermhwq_128B : +Hexagon_V65_vv128iiiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhwq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add,v_ftype_SISIVDVI,4) +// tag : V6_vscattermhw_add +def int_hexagon_V6_vscattermhw_add : +Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw_add">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add_128B,v_ftype_SISIVDVI,4) +// tag : V6_vscattermhw_add_128B +def int_hexagon_V6_vscattermhw_add_128B : +Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_add_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdd0,VD_ftype_,0) +// tag : V6_vdd0 +def int_hexagon_V6_vdd0 : +Hexagon_v1024_Intrinsic<"HEXAGON_V6_vdd0">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdd0_128B,VD_ftype_,0) +// tag : V6_vdd0_128B +def int_hexagon_V6_vdd0_128B : +Hexagon_V65_v2048_Intrinsic<"HEXAGON_V6_vdd0_128B">; diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index 68f123df0430..73622ce9303f 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -682,11 +682,21 @@ let TargetPrefix = "nvvm" in { def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">, Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; +// FNS -// Atomic not available as an llvm intrinsic. + def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +// Atomics not available as llvm intrinsics. def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty], [LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty], [IntrArgMemOnly, NoCapture<0>]>; + // Atomic add of f64 requires sm_60. + def int_nvvm_atomic_load_add_f64 : Intrinsic<[llvm_double_ty], + [LLVMAnyPointerType<llvm_double_ty>, llvm_double_ty], + [IntrArgMemOnly, NoCapture<0>]>; + def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty], [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty], [IntrArgMemOnly, NoCapture<0>]>; @@ -750,6 +760,17 @@ let TargetPrefix = "nvvm" in { def int_nvvm_bar_sync : Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>, GCCBuiltin<"__nvvm_bar_sync">; + def int_nvvm_bar_warp_sync : + Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>, + GCCBuiltin<"__nvvm_bar_warp_sync">; + + // barrier.sync id[, cnt] + def int_nvvm_barrier_sync : + Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>, + GCCBuiltin<"__nvvm_barrier_sync">; + def int_nvvm_barrier_sync_cnt : + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent]>, + GCCBuiltin<"__nvvm_barrier_sync_cnt">; // Membar def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">, @@ -3700,40 +3721,308 @@ def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">; // shfl.down.b32 dest, val, offset, mask_and_clamp def int_nvvm_shfl_down_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.down.i32">, + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.down.i32">, GCCBuiltin<"__nvvm_shfl_down_i32">; def int_nvvm_shfl_down_f32 : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.down.f32">, + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.down.f32">, GCCBuiltin<"__nvvm_shfl_down_f32">; // shfl.up.b32 dest, val, offset, mask_and_clamp def int_nvvm_shfl_up_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.up.i32">, + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.up.i32">, GCCBuiltin<"__nvvm_shfl_up_i32">; def int_nvvm_shfl_up_f32 : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.up.f32">, + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.up.f32">, GCCBuiltin<"__nvvm_shfl_up_f32">; // shfl.bfly.b32 dest, val, offset, mask_and_clamp def int_nvvm_shfl_bfly_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.bfly.i32">, + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.bfly.i32">, GCCBuiltin<"__nvvm_shfl_bfly_i32">; def int_nvvm_shfl_bfly_f32 : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.bfly.f32">, + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.bfly.f32">, GCCBuiltin<"__nvvm_shfl_bfly_f32">; // shfl.idx.b32 dest, val, lane, mask_and_clamp def int_nvvm_shfl_idx_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.idx.i32">, + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.idx.i32">, GCCBuiltin<"__nvvm_shfl_idx_i32">; def int_nvvm_shfl_idx_f32 : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.idx.f32">, + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.idx.f32">, GCCBuiltin<"__nvvm_shfl_idx_f32">; + +// Synchronizing shfl variants available in CUDA-9. +// On sm_70 these don't have to be convergent, so we may eventually want to +// implement non-convergent variant of this intrinsic. + +// shfl.sync.down.b32 dest, threadmask, val, offset , mask_and_clamp +def int_nvvm_shfl_sync_down_i32 : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.down.i32">, + GCCBuiltin<"__nvvm_shfl_sync_down_i32">; +def int_nvvm_shfl_sync_down_f32 : + Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.down.f32">, + GCCBuiltin<"__nvvm_shfl_sync_down_f32">; + +// shfl.sync.up.b32 dest, threadmask, val, offset, mask_and_clamp +def int_nvvm_shfl_sync_up_i32 : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.up.i32">, + GCCBuiltin<"__nvvm_shfl_sync_up_i32">; +def int_nvvm_shfl_sync_up_f32 : + Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.up.f32">, + GCCBuiltin<"__nvvm_shfl_sync_up_f32">; + +// shfl.sync.bfly.b32 dest, threadmask, val, offset, mask_and_clamp +def int_nvvm_shfl_sync_bfly_i32 : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.bfly.i32">, + GCCBuiltin<"__nvvm_shfl_sync_bfly_i32">; +def int_nvvm_shfl_sync_bfly_f32 : + Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.bfly.f32">, + GCCBuiltin<"__nvvm_shfl_sync_bfly_f32">; + +// shfl.sync.idx.b32 dest, threadmask, val, lane, mask_and_clamp +def int_nvvm_shfl_sync_idx_i32 : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.idx.i32">, + GCCBuiltin<"__nvvm_shfl_sync_idx_i32">; +def int_nvvm_shfl_sync_idx_f32 : + Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.idx.f32">, + GCCBuiltin<"__nvvm_shfl_sync_idx_f32">; + +// +// VOTE +// + +// vote.all pred +def int_nvvm_vote_all : + Intrinsic<[llvm_i1_ty], [llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.all">, + GCCBuiltin<"__nvvm_vote_all">; +// vote.any pred +def int_nvvm_vote_any : + Intrinsic<[llvm_i1_ty], [llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.any">, + GCCBuiltin<"__nvvm_vote_any">; +// vote.uni pred +def int_nvvm_vote_uni : + Intrinsic<[llvm_i1_ty], [llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.uni">, + GCCBuiltin<"__nvvm_vote_uni">; +// vote.ballot pred +def int_nvvm_vote_ballot : + Intrinsic<[llvm_i32_ty], [llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.ballot">, + GCCBuiltin<"__nvvm_vote_ballot">; + +// +// VOTE.SYNC +// + +// vote.sync.all mask, pred +def int_nvvm_vote_all_sync : + Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.all.sync">, + GCCBuiltin<"__nvvm_vote_all_sync">; +// vote.sync.any mask, pred +def int_nvvm_vote_any_sync : + Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.any.sync">, + GCCBuiltin<"__nvvm_vote_any_sync">; +// vote.sync.uni mask, pred +def int_nvvm_vote_uni_sync : + Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.uni.sync">, + GCCBuiltin<"__nvvm_vote_uni_sync">; +// vote.sync.ballot mask, pred +def int_nvvm_vote_ballot_sync : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.ballot.sync">, + GCCBuiltin<"__nvvm_vote_ballot_sync">; + +// +// MATCH.SYNC +// +// match.any.sync.b32 mask, value +def int_nvvm_match_any_sync_i32 : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i32">, + GCCBuiltin<"__nvvm_match_any_sync_i32">; +// match.any.sync.b64 mask, value +def int_nvvm_match_any_sync_i64 : + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i64">, + GCCBuiltin<"__nvvm_match_any_sync_i64">; + +// match.all instruction have two variants -- one returns a single value, another +// returns a pair {value, predicate}. We currently only implement the latter as +// that's the variant exposed by CUDA API. + +// match.all.sync.b32p mask, value +def int_nvvm_match_all_sync_i32p : + Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i32p">; +// match.all.sync.b64p mask, value +def int_nvvm_match_all_sync_i64p : + Intrinsic<[llvm_i64_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], + [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i64p">; + +// +// WMMA instructions +// + +// WMMA.LOAD +class NVVM_WMMA_LD_ALSTS<string Abc, string Layout, string Space, + string Type, LLVMType regty, int WithStride> + : Intrinsic<!if(!eq(Abc#Type,"cf16"), + [regty, regty, regty, regty], + [regty, regty, regty, regty, + regty, regty, regty, regty]), + !if(WithStride, [llvm_ptr_ty, llvm_i32_ty], [llvm_ptr_ty]), + [], // Properties must be set during instantiation. + "llvm.nvvm.wmma.load."#Abc#".sync."#Layout#".m16n16k16" + #Space + #!if(WithStride,".stride","") + #"."#Type>; + +multiclass NVVM_WMMA_LD_ALST<string Abc, string Layout, string Space, + string Type, LLVMType regty> { + def _stride: NVVM_WMMA_LD_ALSTS<Abc, Layout, Space, Type, regty, 1>; + def NAME : NVVM_WMMA_LD_ALSTS<Abc, Layout, Space, Type, regty, 0>; } + +multiclass NVVM_WMMA_LD_ALT<string Abc, string Layout, + string Type, LLVMType regty> { + defm _global: NVVM_WMMA_LD_ALST<Abc, Layout, ".global", Type, regty>; + defm _shared: NVVM_WMMA_LD_ALST<Abc, Layout, ".shared", Type, regty>; + defm NAME: NVVM_WMMA_LD_ALST<Abc, Layout, "", Type, regty>; +} + +multiclass NVVM_WMMA_LD_AT<string Abc, string Type, LLVMType regty> { + defm _row: NVVM_WMMA_LD_ALT<Abc, "row", Type, regty>; + defm _col: NVVM_WMMA_LD_ALT<Abc, "col", Type, regty>; +} + +// For some reason ReadOnly<N> and NoCapture<N> confuses tblgen if they are +// passed to Intrinsic<> form inside of a multiclass. Setting them globally +// outside of the multiclass works. +let IntrProperties = [IntrReadMem, IntrArgMemOnly, + ReadOnly<0>, NoCapture<0>] in { + defm int_nvvm_wmma_load_a_f16: NVVM_WMMA_LD_AT<"a", "f16", llvm_v2f16_ty>; + defm int_nvvm_wmma_load_b_f16: NVVM_WMMA_LD_AT<"b", "f16", llvm_v2f16_ty>; + defm int_nvvm_wmma_load_c_f16: NVVM_WMMA_LD_AT<"c", "f16", llvm_v2f16_ty>; + defm int_nvvm_wmma_load_c_f32: NVVM_WMMA_LD_AT<"c", "f32", llvm_float_ty>; +} + +// WMMA.STORE.D +class NVVM_WMMA_STD_LSTS<string Layout, string Space, + string Type, LLVMType regty, int WithStride, + // This is only used to create a typed empty array we + // need to pass to !if below. + list<LLVMType>Empty=[]> + : Intrinsic<[], + !listconcat( + [llvm_ptr_ty], + !if(!eq(Type,"f16"), + [regty, regty, regty, regty], + [regty, regty, regty, regty, + regty, regty, regty, regty]), + !if(WithStride, [llvm_i32_ty], Empty)), + [], // Properties must be set during instantiation. + "llvm.nvvm.wmma.store.d.sync."#Layout + #".m16n16k16"#Space + #!if(WithStride,".stride","") + #"."#Type>; + +multiclass NVVM_WMMA_STD_LST<string Layout, string Space, + string Type, LLVMType regty> { + def _stride: NVVM_WMMA_STD_LSTS<Layout, Space, Type, regty, 1>; + def NAME: NVVM_WMMA_STD_LSTS<Layout, Space, Type, regty, 0>; +} + +multiclass NVVM_WMMA_STD_LT<string Layout, string Type, LLVMType regty> { + defm _global: NVVM_WMMA_STD_LST<Layout, ".global", Type, regty>; + defm _shared: NVVM_WMMA_STD_LST<Layout, ".shared", Type, regty>; + defm NAME: NVVM_WMMA_STD_LST<Layout, "", Type, regty>; +} + +multiclass NVVM_WMMA_STD_T<string Type, LLVMType regty> { + defm _row: NVVM_WMMA_STD_LT<"row", Type, regty>; + defm _col: NVVM_WMMA_STD_LT<"col", Type, regty>; +} + +let IntrProperties = [IntrWriteMem, IntrArgMemOnly, + WriteOnly<0>, NoCapture<0>] in { + defm int_nvvm_wmma_store_d_f16: NVVM_WMMA_STD_T<"f16", llvm_v2f16_ty>; + defm int_nvvm_wmma_store_d_f32: NVVM_WMMA_STD_T<"f32", llvm_float_ty>; +} + +// WMMA.MMA +class NVVM_WMMA_MMA_ABDCS<string ALayout, string BLayout, + string DType, LLVMType d_regty, + string CType, LLVMType c_regty, + string Satfinite = ""> + : Intrinsic<!if(!eq(DType,"f16"), + [d_regty, d_regty, d_regty, d_regty], + [d_regty, d_regty, d_regty, d_regty, + d_regty, d_regty, d_regty, d_regty]), + !listconcat( + [// A + llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, + llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, + // B + llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, + llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty], + !if(!eq(CType,"f16"), + [c_regty, c_regty, c_regty, c_regty], + [c_regty, c_regty, c_regty, c_regty, + c_regty, c_regty, c_regty, c_regty])), + [IntrNoMem], + "llvm.nvvm.wmma.mma.sync."#ALayout#"."#BLayout + #".m16n16k16."#DType#"."#CType#Satfinite>; + +multiclass NVVM_WMMA_MMA_ABDC<string ALayout, string BLayout, + string DType, LLVMType d_regty, + string CType, LLVMType c_regty> { + def NAME : NVVM_WMMA_MMA_ABDCS<ALayout, BLayout, + DType, d_regty, + CType, c_regty>; + def _satfinite: NVVM_WMMA_MMA_ABDCS<ALayout, BLayout, + DType, d_regty, + CType, c_regty,".satfinite">; +} + +multiclass NVVM_WMMA_MMA_ABD<string ALayout, string BLayout, + string DType, LLVMType d_regty> { + defm _f16: NVVM_WMMA_MMA_ABDC<ALayout, BLayout, DType, d_regty, + "f16", llvm_v2f16_ty>; + defm _f32: NVVM_WMMA_MMA_ABDC<ALayout, BLayout, DType, d_regty, + "f32", llvm_float_ty>; +} + +multiclass NVVM_WMMA_MMA_AB<string ALayout, string BLayout> { + defm _f16: NVVM_WMMA_MMA_ABD<ALayout, BLayout, "f16", llvm_v2f16_ty>; + defm _f32: NVVM_WMMA_MMA_ABD<ALayout, BLayout, "f32", llvm_float_ty>; +} + +multiclass NVVM_WMMA_MMA_A<string ALayout> { + defm _col: NVVM_WMMA_MMA_AB<ALayout, "col">; + defm _row: NVVM_WMMA_MMA_AB<ALayout, "row">; +} + +defm int_nvvm_wmma_mma_sync_col: NVVM_WMMA_MMA_A<"col">; +defm int_nvvm_wmma_mma_sync_row: NVVM_WMMA_MMA_A<"row">; + +} // let TargetPrefix = "nvvm" diff --git a/include/llvm/IR/IntrinsicsSystemZ.td b/include/llvm/IR/IntrinsicsSystemZ.td index 98065bc51d99..caa2ec209a31 100644 --- a/include/llvm/IR/IntrinsicsSystemZ.td +++ b/include/llvm/IR/IntrinsicsSystemZ.td @@ -198,17 +198,17 @@ multiclass SystemZQuaternaryIntCCBHF { let TargetPrefix = "s390" in { def int_s390_tbegin : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrNoDuplicate]>; + [IntrNoDuplicate, IntrWriteMem]>; def int_s390_tbegin_nofloat : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrNoDuplicate]>; + [IntrNoDuplicate, IntrWriteMem]>; def int_s390_tbeginc : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], - [IntrNoDuplicate]>; + [IntrNoDuplicate, IntrWriteMem]>; def int_s390_tabort : Intrinsic<[], [llvm_i64_ty], - [IntrNoReturn, Throws]>; + [IntrNoReturn, Throws, IntrWriteMem]>; def int_s390_tend : GCCBuiltin<"__builtin_tend">, Intrinsic<[llvm_i32_ty], []>; @@ -217,7 +217,7 @@ let TargetPrefix = "s390" in { Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty], - [IntrArgMemOnly]>; + [IntrArgMemOnly, IntrWriteMem]>; def int_s390_ppa_txassist : GCCBuiltin<"__builtin_tx_assist">, Intrinsic<[], [llvm_i32_ty]>; @@ -260,9 +260,7 @@ let TargetPrefix = "s390" in { def int_s390_vstl : GCCBuiltin<"__builtin_s390_vstl">, Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], - // In fact write-only but there's no property - // for that. - [IntrArgMemOnly]>; + [IntrArgMemOnly, IntrWriteMem]>; defm int_s390_vupl : SystemZUnaryExtBHWF<"vupl">; defm int_s390_vupll : SystemZUnaryExtBHF<"vupll">; @@ -413,9 +411,7 @@ let TargetPrefix = "s390" in { def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">, Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], - // In fact write-only but there's no property - // for that. - [IntrArgMemOnly]>; + [IntrArgMemOnly, IntrWriteMem]>; } //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 80c528768dc7..bd6177c5b3d9 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -64,6 +64,35 @@ let TargetPrefix = "x86" in { } //===----------------------------------------------------------------------===// +// CET SS +let TargetPrefix = "x86" in { + def int_x86_incsspd : GCCBuiltin<"__builtin_ia32_incsspd">, + Intrinsic<[], [llvm_i32_ty], []>; + def int_x86_incsspq : GCCBuiltin<"__builtin_ia32_incsspq">, + Intrinsic<[], [llvm_i64_ty], []>; + def int_x86_rdsspd : GCCBuiltin<"__builtin_ia32_rdsspd">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + def int_x86_rdsspq : GCCBuiltin<"__builtin_ia32_rdsspq">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>; + def int_x86_saveprevssp : GCCBuiltin<"__builtin_ia32_saveprevssp">, + Intrinsic<[], [], []>; + def int_x86_rstorssp : GCCBuiltin<"__builtin_ia32_rstorssp">, + Intrinsic<[], [llvm_ptr_ty], []>; + def int_x86_wrssd : GCCBuiltin<"__builtin_ia32_wrssd">, + Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>; + def int_x86_wrssq : GCCBuiltin<"__builtin_ia32_wrssq">, + Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], []>; + def int_x86_wrussd : GCCBuiltin<"__builtin_ia32_wrussd">, + Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>; + def int_x86_wrussq : GCCBuiltin<"__builtin_ia32_wrussq">, + Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], []>; + def int_x86_setssbsy : GCCBuiltin<"__builtin_ia32_setssbsy">, + Intrinsic<[], [], []>; + def int_x86_clrssbsy : GCCBuiltin<"__builtin_ia32_clrssbsy">, + Intrinsic<[], [llvm_ptr_ty], []>; +} + +//===----------------------------------------------------------------------===// // 3DNow! let TargetPrefix = "x86" in { @@ -379,12 +408,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; - def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, - llvm_v16i8_ty], [IntrNoMem, Commutative]>; - def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem, Commutative]>; @@ -664,18 +687,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_pabs_b_128 : GCCBuiltin<"__builtin_ia32_pabsb128">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_ssse3_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_pabs_w_128 : GCCBuiltin<"__builtin_ia32_pabsw128">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_ssse3_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_pabs_d_128 : GCCBuiltin<"__builtin_ia32_pabsd128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -709,29 +726,68 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_aesni_aesimc : GCCBuiltin<"__builtin_ia32_aesimc128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_aesni_aesenc : GCCBuiltin<"__builtin_ia32_aesenc128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_aesni_aesenc_256 : GCCBuiltin<"__builtin_ia32_aesenc256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + def int_x86_aesni_aesenc_512 : GCCBuiltin<"__builtin_ia32_aesenc512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], + [IntrNoMem]>; + def int_x86_aesni_aesenclast : GCCBuiltin<"__builtin_ia32_aesenclast128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_aesni_aesenclast_256 : + GCCBuiltin<"__builtin_ia32_aesenclast256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + def int_x86_aesni_aesenclast_512 : + GCCBuiltin<"__builtin_ia32_aesenclast512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], + [IntrNoMem]>; + def int_x86_aesni_aesdec : GCCBuiltin<"__builtin_ia32_aesdec128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_aesni_aesdec_256 : GCCBuiltin<"__builtin_ia32_aesdec256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + def int_x86_aesni_aesdec_512 : GCCBuiltin<"__builtin_ia32_aesdec512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], + [IntrNoMem]>; + def int_x86_aesni_aesdeclast : GCCBuiltin<"__builtin_ia32_aesdeclast128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_aesni_aesdeclast_256 : + GCCBuiltin<"__builtin_ia32_aesdeclast256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + def int_x86_aesni_aesdeclast_512 : + GCCBuiltin<"__builtin_ia32_aesdeclast512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], + [IntrNoMem]>; + def int_x86_aesni_aeskeygenassist : GCCBuiltin<"__builtin_ia32_aeskeygenassist128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; } -// PCLMUL instruction +// PCLMUL instructions let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_pclmulqdq : GCCBuiltin<"__builtin_ia32_pclmulqdq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_pclmulqdq_256 : GCCBuiltin<"__builtin_ia32_pclmulqdq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_pclmulqdq_512 : GCCBuiltin<"__builtin_ia32_pclmulqdq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; } // Vector pack @@ -977,19 +1033,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_vpermilvarps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx_vperm2f128_pd_256 : - GCCBuiltin<"__builtin_ia32_vperm2f128_pd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, - llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx_vperm2f128_ps_256 : - GCCBuiltin<"__builtin_ia32_vperm2f128_ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, - llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx_vperm2f128_si_256 : - GCCBuiltin<"__builtin_ia32_vperm2f128_si256">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, - llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vpermi2var_d_128 : GCCBuiltin<"__builtin_ia32_vpermi2vard128_mask">, Intrinsic<[llvm_v4i32_ty], @@ -1325,52 +1368,56 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_shuf_f32x4_256 : - GCCBuiltin<"__builtin_ia32_shuf_f32x4_256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrNoMem]>; +} - def int_x86_avx512_mask_shuf_f32x4 : - GCCBuiltin<"__builtin_ia32_shuf_f32x4_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], +// GFNI Instructions +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_vgf2p8affineinvqb_128 : + GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v16qi">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_shuf_f64x2_256 : - GCCBuiltin<"__builtin_ia32_shuf_f64x2_256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], + def int_x86_vgf2p8affineinvqb_256 : + GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v32qi">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_shuf_f64x2 : - GCCBuiltin<"__builtin_ia32_shuf_f64x2_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], + def int_x86_vgf2p8affineinvqb_512 : + GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v64qi">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_shuf_i32x4_256 : - GCCBuiltin<"__builtin_ia32_shuf_i32x4_256_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], + def int_x86_vgf2p8affineqb_128 : + GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v16qi">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_shuf_i32x4 : - GCCBuiltin<"__builtin_ia32_shuf_i32x4_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], + def int_x86_vgf2p8affineqb_256 : + GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v32qi">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_shuf_i64x2_256 : - GCCBuiltin<"__builtin_ia32_shuf_i64x2_256_mask">, - Intrinsic<[llvm_v4i64_ty], - [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], + def int_x86_vgf2p8affineqb_512 : + GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v64qi">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_shuf_i64x2 : - GCCBuiltin<"__builtin_ia32_shuf_i64x2_mask">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], + def int_x86_vgf2p8mulb_128 : + GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v16qi">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_vgf2p8mulb_256 : + GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v32qi">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; + def int_x86_vgf2p8mulb_512 : + GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v64qi">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; } @@ -1464,80 +1511,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">, Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_d_512 : GCCBuiltin<"__builtin_ia32_ptestmd512">, - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, - llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_q_512 : GCCBuiltin<"__builtin_ia32_ptestmq512">, - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, - llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_ptestm_b_128 : GCCBuiltin<"__builtin_ia32_ptestmb128">, - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, - llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_b_256 : GCCBuiltin<"__builtin_ia32_ptestmb256">, - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, - llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_b_512 : GCCBuiltin<"__builtin_ia32_ptestmb512">, - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, - llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_d_128 : GCCBuiltin<"__builtin_ia32_ptestmd128">, - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, - llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_d_256 : GCCBuiltin<"__builtin_ia32_ptestmd256">, - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, - llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_q_128 : GCCBuiltin<"__builtin_ia32_ptestmq128">, - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, - llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_q_256 : GCCBuiltin<"__builtin_ia32_ptestmq256">, - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, - llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_w_128 : GCCBuiltin<"__builtin_ia32_ptestmw128">, - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_w_256 : GCCBuiltin<"__builtin_ia32_ptestmw256">, - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, - llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestm_w_512 : GCCBuiltin<"__builtin_ia32_ptestmw512">, - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, - llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_ptestnm_b_128 : GCCBuiltin<"__builtin_ia32_ptestnmb128">, - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, - llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_b_256 : GCCBuiltin<"__builtin_ia32_ptestnmb256">, - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, - llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_b_512 : GCCBuiltin<"__builtin_ia32_ptestnmb512">, - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, - llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_d_128 : GCCBuiltin<"__builtin_ia32_ptestnmd128">, - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, - llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_d_256 : GCCBuiltin<"__builtin_ia32_ptestnmd256">, - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, - llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_d_512 : GCCBuiltin<"__builtin_ia32_ptestnmd512">, - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, - llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_q_128 : GCCBuiltin<"__builtin_ia32_ptestnmq128">, - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, - llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_q_256 : GCCBuiltin<"__builtin_ia32_ptestnmq256">, - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, - llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_q_512 : GCCBuiltin<"__builtin_ia32_ptestnmq512">, - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, - llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_w_128 : GCCBuiltin<"__builtin_ia32_ptestnmw128">, - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_w_256 : GCCBuiltin<"__builtin_ia32_ptestnmw256">, - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, - llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_ptestnm_w_512 : GCCBuiltin<"__builtin_ia32_ptestnmw512">, - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, - llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_fpclass_pd_128 : GCCBuiltin<"__builtin_ia32_fpclasspd128_mask">, @@ -1634,6 +1607,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrArgMemOnly]>; } +// BITALG bits shuffle +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_vpshufbitqmb_128 : + GCCBuiltin<"__builtin_ia32_vpshufbitqmb128_mask">, + Intrinsic<[llvm_i16_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_vpshufbitqmb_256 : + GCCBuiltin<"__builtin_ia32_vpshufbitqmb256_mask">, + Intrinsic<[llvm_i32_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_vpshufbitqmb_512 : + GCCBuiltin<"__builtin_ia32_vpshufbitqmb512_mask">, + Intrinsic<[llvm_i64_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; +} + //===----------------------------------------------------------------------===// // AVX2 @@ -1678,12 +1670,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; - def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">, - Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, - llvm_v32i8_ty], [IntrNoMem, Commutative]>; - def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">, - Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, - llvm_v16i16_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">, Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem, Commutative]>; @@ -1841,88 +1827,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v8i32_ty], [IntrNoMem]>; } -// Absolute value ops -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">, - Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; - def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">, - Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_b_128 : - GCCBuiltin<"__builtin_ia32_pabsb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_b_256 : - GCCBuiltin<"__builtin_ia32_pabsb256_mask">, - Intrinsic<[llvm_v32i8_ty], - [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_b_512 : - GCCBuiltin<"__builtin_ia32_pabsb512_mask">, - Intrinsic<[llvm_v64i8_ty], - [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_d_128 : - GCCBuiltin<"__builtin_ia32_pabsd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_d_256 : - GCCBuiltin<"__builtin_ia32_pabsd256_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_d_512 : - GCCBuiltin<"__builtin_ia32_pabsd512_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_q_128 : - GCCBuiltin<"__builtin_ia32_pabsq128_mask">, - Intrinsic<[llvm_v2i64_ty], - [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_q_256 : - GCCBuiltin<"__builtin_ia32_pabsq256_mask">, - Intrinsic<[llvm_v4i64_ty], - [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_q_512 : - GCCBuiltin<"__builtin_ia32_pabsq512_mask">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_w_128 : - GCCBuiltin<"__builtin_ia32_pabsw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_w_256 : - GCCBuiltin<"__builtin_ia32_pabsw256_mask">, - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_w_512 : - GCCBuiltin<"__builtin_ia32_pabsw512_mask">, - Intrinsic<[llvm_v32i16_ty], - [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrNoMem]>; -} - // Horizontal arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">, @@ -1984,65 +1888,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v32i8_ty], [IntrNoMem]>; } -// Vector load with broadcast -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_mask_pbroadcast_b_gpr_128 : - GCCBuiltin<"__builtin_ia32_pbroadcastb128_gpr_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pbroadcast_b_gpr_256 : - GCCBuiltin<"__builtin_ia32_pbroadcastb256_gpr_mask">, - Intrinsic<[llvm_v32i8_ty], - [llvm_i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pbroadcast_b_gpr_512 : - GCCBuiltin<"__builtin_ia32_pbroadcastb512_gpr_mask">, - Intrinsic<[llvm_v64i8_ty], - [llvm_i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_pbroadcast_w_gpr_128 : - GCCBuiltin<"__builtin_ia32_pbroadcastw128_gpr_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pbroadcast_w_gpr_256 : - GCCBuiltin<"__builtin_ia32_pbroadcastw256_gpr_mask">, - Intrinsic<[llvm_v16i16_ty], - [llvm_i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pbroadcast_w_gpr_512 : - GCCBuiltin<"__builtin_ia32_pbroadcastw512_gpr_mask">, - Intrinsic<[llvm_v32i16_ty], - [llvm_i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_pbroadcast_d_gpr_128 : - GCCBuiltin<"__builtin_ia32_pbroadcastd128_gpr_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pbroadcast_d_gpr_256 : - GCCBuiltin<"__builtin_ia32_pbroadcastd256_gpr_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pbroadcast_d_gpr_512 : - GCCBuiltin<"__builtin_ia32_pbroadcastd512_gpr_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_pbroadcast_q_gpr_128 : - GCCBuiltin<"__builtin_ia32_pbroadcastq128_gpr_mask">, - Intrinsic<[llvm_v2i64_ty], - [llvm_i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pbroadcast_q_gpr_256 : - GCCBuiltin<"__builtin_ia32_pbroadcastq256_gpr_mask">, - Intrinsic<[llvm_v4i64_ty], - [llvm_i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pbroadcast_q_gpr_512 : - GCCBuiltin<"__builtin_ia32_pbroadcastq512_gpr_mask">, - Intrinsic<[llvm_v8i64_ty], - [llvm_i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_pbroadcast_q_mem_512 : - GCCBuiltin<"__builtin_ia32_pbroadcastq512_mem_mask">, - Intrinsic<[llvm_v8i64_ty], - [llvm_i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; -} // Vector permutation let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". @@ -2052,9 +1897,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">, - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, - llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Conditional load ops @@ -2346,11 +2188,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // FMA3 and FMA4 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_fma_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">, + def int_x86_fma_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss3">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd3">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_fma_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">, + def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; @@ -2371,75 +2221,75 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">, + def int_x86_fma_vfmsub_ss : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_fma_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">, + def int_x86_fma_vfmsub_sd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_fma_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">, + def int_x86_fma_vfmsub_ps : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_fma_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">, + def int_x86_fma_vfmsub_pd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_fma_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">, + def int_x86_fma_vfmsub_ps_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_fma_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">, + def int_x86_fma_vfmsub_pd_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">, + def int_x86_fma_vfnmadd_ss : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_fma_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">, + def int_x86_fma_vfnmadd_sd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_fma_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">, + def int_x86_fma_vfnmadd_ps : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_fma_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">, + def int_x86_fma_vfnmadd_pd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_fma_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">, + def int_x86_fma_vfnmadd_ps_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_fma_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">, + def int_x86_fma_vfnmadd_pd_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">, + def int_x86_fma_vfnmsub_ss : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_fma_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">, + def int_x86_fma_vfnmsub_sd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_fma_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">, + def int_x86_fma_vfnmsub_ps : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_fma_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">, + def int_x86_fma_vfnmsub_pd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_fma_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">, + def int_x86_fma_vfnmsub_ps_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_fma_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">, + def int_x86_fma_vfnmsub_pd_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; @@ -2461,21 +2311,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">, + def int_x86_fma_vfmsubadd_ps : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_fma_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">, + def int_x86_fma_vfmsubadd_pd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_fma_vfmsubadd_ps_256 : - GCCBuiltin<"__builtin_ia32_vfmsubaddps256">, + def int_x86_fma_vfmsubadd_ps_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_fma_vfmsubadd_pd_256 : - GCCBuiltin<"__builtin_ia32_vfmsubaddpd256">, + def int_x86_fma_vfmsubadd_pd_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; @@ -2987,6 +2835,109 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } +// VNNI +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_vpdpbusd_128 : + GCCBuiltin<"__builtin_ia32_vpdpbusd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusd_128 : + GCCBuiltin<"__builtin_ia32_vpdpbusd128_maskz">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpbusd_256 : + GCCBuiltin<"__builtin_ia32_vpdpbusd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusd_256 : + GCCBuiltin<"__builtin_ia32_vpdpbusd256_maskz">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpbusd_512 : + GCCBuiltin<"__builtin_ia32_vpdpbusd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusd_512 : + GCCBuiltin<"__builtin_ia32_vpdpbusd512_maskz">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpdpbusds_128 : + GCCBuiltin<"__builtin_ia32_vpdpbusds128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusds_128 : + GCCBuiltin<"__builtin_ia32_vpdpbusds128_maskz">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpbusds_256 : + GCCBuiltin<"__builtin_ia32_vpdpbusds256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusds_256 : + GCCBuiltin<"__builtin_ia32_vpdpbusds256_maskz">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpbusds_512 : + GCCBuiltin<"__builtin_ia32_vpdpbusds512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpbusds_512 : + GCCBuiltin<"__builtin_ia32_vpdpbusds512_maskz">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpdpwssd_128 : + GCCBuiltin<"__builtin_ia32_vpdpwssd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssd_128 : + GCCBuiltin<"__builtin_ia32_vpdpwssd128_maskz">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpwssd_256 : + GCCBuiltin<"__builtin_ia32_vpdpwssd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssd_256 : + GCCBuiltin<"__builtin_ia32_vpdpwssd256_maskz">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpwssd_512 : + GCCBuiltin<"__builtin_ia32_vpdpwssd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssd_512 : + GCCBuiltin<"__builtin_ia32_vpdpwssd512_maskz">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpdpwssds_128 : + GCCBuiltin<"__builtin_ia32_vpdpwssds128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssds_128 : + GCCBuiltin<"__builtin_ia32_vpdpwssds128_maskz">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpwssds_256 : + GCCBuiltin<"__builtin_ia32_vpdpwssds256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssds_256 : + GCCBuiltin<"__builtin_ia32_vpdpwssds256_maskz">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpdpwssds_512 : + GCCBuiltin<"__builtin_ia32_vpdpwssds512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpdpwssds_512 : + GCCBuiltin<"__builtin_ia32_vpdpwssds512_maskz">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; +} + //===----------------------------------------------------------------------===// // XOP @@ -3648,10 +3599,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". } //===----------------------------------------------------------------------===// -// CLFLUSHOPT +// CLFLUSHOPT and CLWB let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_clflushopt : GCCBuiltin<"__builtin_ia32_clflushopt">, Intrinsic<[], [llvm_ptr_ty], []>; + + def int_x86_clwb : GCCBuiltin<"__builtin_ia32_clwb">, + Intrinsic<[], [llvm_ptr_ty], []>; } //===----------------------------------------------------------------------===// @@ -3767,32 +3721,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Mask instructions // 16-bit mask - def int_x86_avx512_kand_w : GCCBuiltin<"__builtin_ia32_kandhi">, + def int_x86_avx512_kand_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kandn_w : GCCBuiltin<"__builtin_ia32_kandnhi">, + def int_x86_avx512_kandn_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_knot_w : GCCBuiltin<"__builtin_ia32_knothi">, + def int_x86_avx512_knot_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kor_w : GCCBuiltin<"__builtin_ia32_korhi">, + def int_x86_avx512_kor_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kxor_w : GCCBuiltin<"__builtin_ia32_kxorhi">, + def int_x86_avx512_kxor_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kxnor_w : GCCBuiltin<"__builtin_ia32_kxnorhi">, + def int_x86_avx512_kxnor_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">, - Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem]>; def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; @@ -4407,99 +4352,13 @@ def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mas // Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + // TODO: Remove the broadcast intrinsics with no gcc builtin and autoupgrade def int_x86_avx512_vbroadcast_ss_512 : - GCCBuiltin<"__builtin_ia32_vbroadcastss512">, Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_vbroadcast_sd_512 : - GCCBuiltin<"__builtin_ia32_vbroadcastsd512">, Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_broadcastf32x2_256 : - GCCBuiltin<"__builtin_ia32_broadcastf32x2_256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcastf32x2_512 : - GCCBuiltin<"__builtin_ia32_broadcastf32x2_512_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti32x2_128 : - GCCBuiltin<"__builtin_ia32_broadcasti32x2_128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti32x2_256 : - GCCBuiltin<"__builtin_ia32_broadcasti32x2_256_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti32x2_512 : - GCCBuiltin<"__builtin_ia32_broadcasti32x2_512_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcastf32x4_256 : - GCCBuiltin<"__builtin_ia32_broadcastf32x4_256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcastf32x4_512 : - GCCBuiltin<"__builtin_ia32_broadcastf32x4_512">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcastf32x8_512 : - GCCBuiltin<"__builtin_ia32_broadcastf32x8_512_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v8f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcastf64x2_256 : - GCCBuiltin<"__builtin_ia32_broadcastf64x2_256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v2f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcastf64x2_512 : - GCCBuiltin<"__builtin_ia32_broadcastf64x2_512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v2f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcastf64x4_512 : - GCCBuiltin<"__builtin_ia32_broadcastf64x4_512">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v4f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti32x4_256 : - GCCBuiltin<"__builtin_ia32_broadcasti32x4_256_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti32x4_512 : - GCCBuiltin<"__builtin_ia32_broadcasti32x4_512">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti32x8_512 : - GCCBuiltin<"__builtin_ia32_broadcasti32x8_512_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v8i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti64x2_256 : - GCCBuiltin<"__builtin_ia32_broadcasti64x2_256_mask">, - Intrinsic<[llvm_v4i64_ty], - [llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti64x2_512 : - GCCBuiltin<"__builtin_ia32_broadcasti64x2_512_mask">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti64x4_512 : - GCCBuiltin<"__builtin_ia32_broadcasti64x4_512">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v4i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_broadcastmw_512 : GCCBuiltin<"__builtin_ia32_broadcastmw512">, Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>; @@ -5013,24 +4872,6 @@ let TargetPrefix = "x86" in { def int_x86_avx512_mask_pmulh_w_256 : GCCBuiltin<"__builtin_ia32_pmulhw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pavg_b_512 : GCCBuiltin<"__builtin_ia32_pavgb512_mask">, - Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, - llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pavg_w_512 : GCCBuiltin<"__builtin_ia32_pavgw512_mask">, - Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, - llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pavg_b_128 : GCCBuiltin<"__builtin_ia32_pavgb128_mask">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, - llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pavg_b_256 : GCCBuiltin<"__builtin_ia32_pavgb256_mask">, - Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, - llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pavg_w_128 : GCCBuiltin<"__builtin_ia32_pavgw128_mask">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pavg_w_256 : GCCBuiltin<"__builtin_ia32_pavgw256_mask">, - Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, - llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmaddw_d_128 : GCCBuiltin<"__builtin_ia32_pmaddwd128_mask">, Intrinsic<[llvm_v4i32_ty], @@ -5524,6 +5365,56 @@ let TargetPrefix = "x86" in { Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; + def int_x86_avx512_mask_compress_b_512 : + GCCBuiltin<"__builtin_ia32_compressqi512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, + llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_compress_w_512 : + GCCBuiltin<"__builtin_ia32_compresshi512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_compress_b_256 : + GCCBuiltin<"__builtin_ia32_compressqi256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_compress_w_256 : + GCCBuiltin<"__builtin_ia32_compresshi256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_compress_b_128 : + GCCBuiltin<"__builtin_ia32_compressqi128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_compress_w_128 : + GCCBuiltin<"__builtin_ia32_compresshi128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_compress_store_b_512 : + GCCBuiltin<"__builtin_ia32_compressstoreqi512_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v64i8_ty, + llvm_i64_ty], [IntrArgMemOnly]>; + def int_x86_avx512_mask_compress_store_w_512 : + GCCBuiltin<"__builtin_ia32_compressstorehi512_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, + llvm_i32_ty], [IntrArgMemOnly]>; + def int_x86_avx512_mask_compress_store_b_256 : + GCCBuiltin<"__builtin_ia32_compressstoreqi256_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty, + llvm_i32_ty], [IntrArgMemOnly]>; + def int_x86_avx512_mask_compress_store_w_256 : + GCCBuiltin<"__builtin_ia32_compressstorehi256_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, + llvm_i16_ty], [IntrArgMemOnly]>; + def int_x86_avx512_mask_compress_store_b_128 : + GCCBuiltin<"__builtin_ia32_compressstoreqi128_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v16i8_ty, + llvm_i16_ty], [IntrArgMemOnly]>; + def int_x86_avx512_mask_compress_store_w_128 : + GCCBuiltin<"__builtin_ia32_compressstorehi128_mask">, + Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, + llvm_i8_ty], [IntrArgMemOnly]>; + // expand def int_x86_avx512_mask_expand_ps_512 : GCCBuiltin<"__builtin_ia32_expandsf512_mask">, @@ -5625,6 +5516,304 @@ let TargetPrefix = "x86" in { Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; + def int_x86_avx512_mask_expand_b_512 : + GCCBuiltin<"__builtin_ia32_expandqi512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, + llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_expand_w_512 : + GCCBuiltin<"__builtin_ia32_expandhi512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_expand_b_256 : + GCCBuiltin<"__builtin_ia32_expandqi256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_expand_w_256 : + GCCBuiltin<"__builtin_ia32_expandhi256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_expand_b_128 : + GCCBuiltin<"__builtin_ia32_expandqi128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_expand_w_128 : + GCCBuiltin<"__builtin_ia32_expandhi128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_expand_load_b_512 : + GCCBuiltin<"__builtin_ia32_expandloadqi512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_ptr_ty, llvm_v64i8_ty, + llvm_i64_ty], [IntrReadMem, IntrArgMemOnly]>; + def int_x86_avx512_mask_expand_load_w_512 : + GCCBuiltin<"__builtin_ia32_expandloadhi512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_ptr_ty, llvm_v32i16_ty, + llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; + def int_x86_avx512_mask_expand_load_b_256 : + GCCBuiltin<"__builtin_ia32_expandloadqi256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_v32i8_ty, + llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; + def int_x86_avx512_mask_expand_load_w_256 : + GCCBuiltin<"__builtin_ia32_expandloadhi256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_v16i16_ty, + llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>; + def int_x86_avx512_mask_expand_load_b_128 : + GCCBuiltin<"__builtin_ia32_expandloadqi128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_v16i8_ty, + llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>; + def int_x86_avx512_mask_expand_load_w_128 : + GCCBuiltin<"__builtin_ia32_expandloadhi128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_v8i16_ty, + llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; +} + +// VBMI2 Concat & Shift +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_vpshld_q_512 : + GCCBuiltin<"__builtin_ia32_vpshldq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshld_q_256 : + GCCBuiltin<"__builtin_ia32_vpshldq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshld_q_128 : + GCCBuiltin<"__builtin_ia32_vpshldq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshld_d_512 : + GCCBuiltin<"__builtin_ia32_vpshldd512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshld_d_256 : + GCCBuiltin<"__builtin_ia32_vpshldd256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshld_d_128 : + GCCBuiltin<"__builtin_ia32_vpshldd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshld_w_512 : + GCCBuiltin<"__builtin_ia32_vpshldw512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_v32i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshld_w_256 : + GCCBuiltin<"__builtin_ia32_vpshldw256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, llvm_v16i16_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshld_w_128 : + GCCBuiltin<"__builtin_ia32_vpshldw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshrd_q_512 : + GCCBuiltin<"__builtin_ia32_vpshrdq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrd_q_256 : + GCCBuiltin<"__builtin_ia32_vpshrdq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrd_q_128 : + GCCBuiltin<"__builtin_ia32_vpshrdq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshrd_d_512 : + GCCBuiltin<"__builtin_ia32_vpshrdd512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrd_d_256 : + GCCBuiltin<"__builtin_ia32_vpshrdd256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrd_d_128 : + GCCBuiltin<"__builtin_ia32_vpshrdd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshrd_w_512 : + GCCBuiltin<"__builtin_ia32_vpshrdw512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_v32i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrd_w_256 : + GCCBuiltin<"__builtin_ia32_vpshrdw256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, llvm_v16i16_ty, + llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrd_w_128 : + GCCBuiltin<"__builtin_ia32_vpshrdw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshldv_w_128 : + GCCBuiltin<"__builtin_ia32_vpshldvw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshldv_w_128 : + GCCBuiltin<"__builtin_ia32_vpshldvw128_maskz">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshldv_w_256 : + GCCBuiltin<"__builtin_ia32_vpshldvw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshldv_w_256 : + GCCBuiltin<"__builtin_ia32_vpshldvw256_maskz">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshldv_w_512 : + GCCBuiltin<"__builtin_ia32_vpshldvw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshldv_w_512 : + GCCBuiltin<"__builtin_ia32_vpshldvw512_maskz">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshldv_q_128 : + GCCBuiltin<"__builtin_ia32_vpshldvq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshldv_q_128 : + GCCBuiltin<"__builtin_ia32_vpshldvq128_maskz">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshldv_q_256 : + GCCBuiltin<"__builtin_ia32_vpshldvq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshldv_q_256 : + GCCBuiltin<"__builtin_ia32_vpshldvq256_maskz">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshldv_q_512 : + GCCBuiltin<"__builtin_ia32_vpshldvq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshldv_q_512 : + GCCBuiltin<"__builtin_ia32_vpshldvq512_maskz">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshldv_d_128 : + GCCBuiltin<"__builtin_ia32_vpshldvd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshldv_d_128 : + GCCBuiltin<"__builtin_ia32_vpshldvd128_maskz">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshldv_d_256 : + GCCBuiltin<"__builtin_ia32_vpshldvd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshldv_d_256 : + GCCBuiltin<"__builtin_ia32_vpshldvd256_maskz">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshldv_d_512 : + GCCBuiltin<"__builtin_ia32_vpshldvd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshldv_d_512 : + GCCBuiltin<"__builtin_ia32_vpshldvd512_maskz">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshrdv_w_128 : + GCCBuiltin<"__builtin_ia32_vpshrdvw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshrdv_w_128 : + GCCBuiltin<"__builtin_ia32_vpshrdvw128_maskz">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrdv_w_256 : + GCCBuiltin<"__builtin_ia32_vpshrdvw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshrdv_w_256 : + GCCBuiltin<"__builtin_ia32_vpshrdvw256_maskz">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrdv_w_512 : + GCCBuiltin<"__builtin_ia32_vpshrdvw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshrdv_w_512 : + GCCBuiltin<"__builtin_ia32_vpshrdvw512_maskz">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, + llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshrdv_q_128 : + GCCBuiltin<"__builtin_ia32_vpshrdvq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshrdv_q_128 : + GCCBuiltin<"__builtin_ia32_vpshrdvq128_maskz">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrdv_q_256 : + GCCBuiltin<"__builtin_ia32_vpshrdvq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshrdv_q_256 : + GCCBuiltin<"__builtin_ia32_vpshrdvq256_maskz">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrdv_q_512 : + GCCBuiltin<"__builtin_ia32_vpshrdvq512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshrdv_q_512 : + GCCBuiltin<"__builtin_ia32_vpshrdvq512_maskz">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, + llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vpshrdv_d_128 : + GCCBuiltin<"__builtin_ia32_vpshrdvd128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshrdv_d_128 : + GCCBuiltin<"__builtin_ia32_vpshrdvd128_maskz">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrdv_d_256 : + GCCBuiltin<"__builtin_ia32_vpshrdvd256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshrdv_d_256 : + GCCBuiltin<"__builtin_ia32_vpshrdvd256_maskz">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpshrdv_d_512 : + GCCBuiltin<"__builtin_ia32_vpshrdvd512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_maskz_vpshrdv_d_512 : + GCCBuiltin<"__builtin_ia32_vpshrdvd512_maskz">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, + llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; } // truncate diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h index 4cb77701f762..a95634d32c21 100644 --- a/include/llvm/IR/LLVMContext.h +++ b/include/llvm/IR/LLVMContext.h @@ -16,6 +16,7 @@ #define LLVM_IR_LLVMCONTEXT_H #include "llvm-c/Types.h" +#include "llvm/IR/DiagnosticHandler.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Options.h" #include <cstdint> @@ -99,6 +100,8 @@ public: MD_section_prefix = 20, // "section_prefix" MD_absolute_symbol = 21, // "absolute_symbol" MD_associated = 22, // "associated" + MD_callees = 23, // "callees" + MD_irr_loop = 24, // "irr_loop" }; /// Known operand bundle tag IDs, which always have the same value. All @@ -167,11 +170,6 @@ public: using InlineAsmDiagHandlerTy = void (*)(const SMDiagnostic&, void *Context, unsigned LocCookie); - /// Defines the type of a diagnostic handler. - /// \see LLVMContext::setDiagnosticHandler. - /// \see LLVMContext::diagnose. - using DiagnosticHandlerTy = void (*)(const DiagnosticInfo &DI, void *Context); - /// Defines the type of a yield callback. /// \see LLVMContext::setYieldCallback. using YieldCallbackTy = void (*)(LLVMContext *Context, void *OpaqueHandle); @@ -194,26 +192,43 @@ public: /// setInlineAsmDiagnosticHandler. void *getInlineAsmDiagnosticContext() const; - /// setDiagnosticHandler - This method sets a handler that is invoked - /// when the backend needs to report anything to the user. The first - /// argument is a function pointer and the second is a context pointer that - /// gets passed into the DiagHandler. The third argument should be set to + /// setDiagnosticHandlerCallBack - This method sets a handler call back + /// that is invoked when the backend needs to report anything to the user. + /// The first argument is a function pointer and the second is a context pointer + /// that gets passed into the DiagHandler. The third argument should be set to /// true if the handler only expects enabled diagnostics. /// /// LLVMContext doesn't take ownership or interpret either of these /// pointers. - void setDiagnosticHandler(DiagnosticHandlerTy DiagHandler, - void *DiagContext = nullptr, + void setDiagnosticHandlerCallBack( + DiagnosticHandler::DiagnosticHandlerTy DiagHandler, + void *DiagContext = nullptr, bool RespectFilters = false); + + /// setDiagnosticHandler - This method sets unique_ptr to object of DiagnosticHandler + /// to provide custom diagnostic handling. The first argument is unique_ptr of object + /// of type DiagnosticHandler or a derived of that. The third argument should be + /// set to true if the handler only expects enabled diagnostics. + /// + /// Ownership of this pointer is moved to LLVMContextImpl. + void setDiagnosticHandler(std::unique_ptr<DiagnosticHandler> &&DH, bool RespectFilters = false); - /// getDiagnosticHandler - Return the diagnostic handler set by - /// setDiagnosticHandler. - DiagnosticHandlerTy getDiagnosticHandler() const; + /// getDiagnosticHandlerCallBack - Return the diagnostic handler call back set by + /// setDiagnosticHandlerCallBack. + DiagnosticHandler::DiagnosticHandlerTy getDiagnosticHandlerCallBack() const; /// getDiagnosticContext - Return the diagnostic context set by /// setDiagnosticContext. void *getDiagnosticContext() const; + /// getDiagHandlerPtr - Returns const raw pointer of DiagnosticHandler set by + /// setDiagnosticHandler. + const DiagnosticHandler *getDiagHandlerPtr() const; + + /// getDiagnosticHandler - transfers owenership of DiagnosticHandler unique_ptr + /// to caller. + std::unique_ptr<DiagnosticHandler> getDiagnosticHandler(); + /// \brief Return if a code hotness metric should be included in optimization /// diagnostics. bool getDiagnosticsHotnessRequested() const; diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h index b22f9302298d..3dc4a776dba0 100644 --- a/include/llvm/IR/LegacyPassManagers.h +++ b/include/llvm/IR/LegacyPassManagers.h @@ -279,11 +279,11 @@ private: // when we have multiple instances of the same pass since they'll usually // have the same analysis usage and can share storage. FoldingSet<AUFoldingSetNode> UniqueAnalysisUsages; - + // Allocator used for allocating UAFoldingSetNodes. This handles deletion of // all allocated nodes in one fell swoop. SpecificBumpPtrAllocator<AUFoldingSetNode> AUFoldingSetNodeAllocator; - + // Maps from a pass to it's associated entry in UniqueAnalysisUsages. Does // not own the storage associated with either key or value.. DenseMap<Pass *, AnalysisUsage*> AnUsageMap; diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h index 899976a87bc7..dff1ca12407f 100644 --- a/include/llvm/IR/MDBuilder.h +++ b/include/llvm/IR/MDBuilder.h @@ -30,6 +30,7 @@ class Constant; class ConstantAsMetadata; class MDNode; class MDString; +class Metadata; class MDBuilder { LLVMContext &Context; @@ -85,6 +86,14 @@ public: MDNode *createRange(Constant *Lo, Constant *Hi); //===------------------------------------------------------------------===// + // Callees metadata. + //===------------------------------------------------------------------===// + + /// \brief Return metadata indicating the possible callees of indirect + /// calls. + MDNode *createCallees(ArrayRef<Function *> Callees); + + //===------------------------------------------------------------------===// // AA metadata. //===------------------------------------------------------------------===// @@ -141,9 +150,9 @@ public: struct TBAAStructField { uint64_t Offset; uint64_t Size; - MDNode *TBAA; - TBAAStructField(uint64_t Offset, uint64_t Size, MDNode *TBAA) : - Offset(Offset), Size(Size), TBAA(TBAA) {} + MDNode *Type; + TBAAStructField(uint64_t Offset, uint64_t Size, MDNode *Type) : + Offset(Offset), Size(Size), Type(Type) {} }; /// \brief Return metadata for a tbaa.struct node with the given @@ -165,6 +174,23 @@ public: /// base type, access type and offset relative to the base type. MDNode *createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType, uint64_t Offset, bool IsConstant = false); + + /// \brief Return metadata for a TBAA type node in the TBAA type DAG with the + /// given parent type, size in bytes, type identifier and a list of fields. + MDNode *createTBAATypeNode(MDNode *Parent, uint64_t Size, Metadata *Id, + ArrayRef<TBAAStructField> Fields = + ArrayRef<TBAAStructField>()); + + /// \brief Return metadata for a TBAA access tag with the given base type, + /// final access type, offset of the access relative to the base type, size of + /// the access and flag indicating whether the accessed object can be + /// considered immutable for the purposes of the TBAA analysis. + MDNode *createTBAAAccessTag(MDNode *BaseType, MDNode *AccessType, + uint64_t Offset, uint64_t Size, + bool IsImmutable = false); + + /// \brief Return metadata containing an irreducible loop header weight. + MDNode *createIrrLoopHeaderWeight(uint64_t Weight); }; } // end namespace llvm diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h index 3462cc02fd27..bc0b87a6c348 100644 --- a/include/llvm/IR/Metadata.h +++ b/include/llvm/IR/Metadata.h @@ -958,6 +958,9 @@ public: /// \pre No operands (or operands' operands, etc.) have \a isTemporary(). void resolveCycles(); + /// Resolve a unique, unresolved node. + void resolve(); + /// \brief Replace a temporary node with a permanent one. /// /// Try to create a uniqued version of \c N -- in place, if possible -- and @@ -1009,9 +1012,6 @@ protected: private: void handleChangedOperand(void *Ref, Metadata *New); - /// Resolve a unique, unresolved node. - void resolve(); - /// Drop RAUW support, if any. void dropReplaceableUses(); @@ -1188,7 +1188,8 @@ void TempMDNodeDeleter::operator()(MDNode *Node) const { /// particular Metadata subclass. template <class T> class TypedMDOperandIterator - : std::iterator<std::input_iterator_tag, T *, std::ptrdiff_t, void, T *> { + : public std::iterator<std::input_iterator_tag, T *, std::ptrdiff_t, void, + T *> { MDNode::op_iterator I = nullptr; public: @@ -1302,7 +1303,13 @@ public: if (!Use) return; *Use = MD; - Use = nullptr; + + if (*Use) + MetadataTracking::track(*Use); + + Metadata *T = cast<Metadata>(this); + MetadataTracking::untrack(T); + assert(!Use && "Use is still being tracked despite being untracked!"); } }; diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index 4aa8a0199ab1..dd7a0db83774 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -148,11 +148,15 @@ public: /// In combined summary, indicate that the global value is live. unsigned Live : 1; + /// Indicates that the linker resolved the symbol to a definition from + /// within the same linkage unit. + unsigned DSOLocal : 1; + /// Convenience Constructors explicit GVFlags(GlobalValue::LinkageTypes Linkage, - bool NotEligibleToImport, bool Live) + bool NotEligibleToImport, bool Live, bool IsLocal) : Linkage(Linkage), NotEligibleToImport(NotEligibleToImport), - Live(Live) {} + Live(Live), DSOLocal(IsLocal) {} }; private: @@ -185,7 +189,10 @@ private: protected: GlobalValueSummary(SummaryKind K, GVFlags Flags, std::vector<ValueInfo> Refs) - : Kind(K), Flags(Flags), RefEdgeList(std::move(Refs)) {} + : Kind(K), Flags(Flags), RefEdgeList(std::move(Refs)) { + assert((K != AliasKind || Refs.empty()) && + "Expect no references for AliasSummary"); + } public: virtual ~GlobalValueSummary() = default; @@ -226,12 +233,21 @@ public: void setLive(bool Live) { Flags.Live = Live; } + void setDSOLocal(bool Local) { Flags.DSOLocal = Local; } + + bool isDSOLocal() const { return Flags.DSOLocal; } + /// Flag that this global value cannot be imported. void setNotEligibleToImport() { Flags.NotEligibleToImport = true; } /// Return the list of values referenced by this global value definition. ArrayRef<ValueInfo> refs() const { return RefEdgeList; } + /// If this is an alias summary, returns the summary of the aliased object (a + /// global variable or function), otherwise returns itself. + GlobalValueSummary *getBaseObject(); + const GlobalValueSummary *getBaseObject() const; + friend class ModuleSummaryIndex; friend void computeDeadSymbols(class ModuleSummaryIndex &, const DenseSet<GlobalValue::GUID> &); @@ -240,10 +256,14 @@ public: /// \brief Alias summary information. class AliasSummary : public GlobalValueSummary { GlobalValueSummary *AliaseeSummary; + // AliaseeGUID is only set and accessed when we are building a combined index + // via the BitcodeReader. + GlobalValue::GUID AliaseeGUID; public: - AliasSummary(GVFlags Flags, std::vector<ValueInfo> Refs) - : GlobalValueSummary(AliasKind, Flags, std::move(Refs)) {} + AliasSummary(GVFlags Flags) + : GlobalValueSummary(AliasKind, Flags, ArrayRef<ValueInfo>{}), + AliaseeSummary(nullptr), AliaseeGUID(0) {} /// Check if this is an alias summary. static bool classof(const GlobalValueSummary *GVS) { @@ -251,6 +271,7 @@ public: } void setAliasee(GlobalValueSummary *Aliasee) { AliaseeSummary = Aliasee; } + void setAliaseeGUID(GlobalValue::GUID GUID) { AliaseeGUID = GUID; } const GlobalValueSummary &getAliasee() const { assert(AliaseeSummary && "Unexpected missing aliasee summary"); @@ -261,8 +282,24 @@ public: return const_cast<GlobalValueSummary &>( static_cast<const AliasSummary *>(this)->getAliasee()); } + const GlobalValue::GUID &getAliaseeGUID() const { + assert(AliaseeGUID && "Unexpected missing aliasee GUID"); + return AliaseeGUID; + } }; +const inline GlobalValueSummary *GlobalValueSummary::getBaseObject() const { + if (auto *AS = dyn_cast<AliasSummary>(this)) + return &AS->getAliasee(); + return this; +} + +inline GlobalValueSummary *GlobalValueSummary::getBaseObject() { + if (auto *AS = dyn_cast<AliasSummary>(this)) + return &AS->getAliasee(); + return this; +} + /// \brief Function summary information to aid decisions and implementation of /// importing. class FunctionSummary : public GlobalValueSummary { @@ -273,7 +310,7 @@ public: /// An "identifier" for a virtual function. This contains the type identifier /// represented as a GUID and the offset from the address point to the virtual /// function pointer, where "address point" is as defined in the Itanium ABI: - /// https://mentorembedded.github.io/cxx-abi/abi.html#vtable-general + /// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#vtable-general struct VFuncId { GlobalValue::GUID GUID; uint64_t Offset; @@ -287,11 +324,24 @@ public: std::vector<uint64_t> Args; }; + /// Function attribute flags. Used to track if a function accesses memory, + /// recurses or aliases. + struct FFlags { + unsigned ReadNone : 1; + unsigned ReadOnly : 1; + unsigned NoRecurse : 1; + unsigned ReturnDoesNotAlias : 1; + }; + private: /// Number of instructions (ignoring debug instructions, e.g.) computed /// during the initial compile step when the summary index is first built. unsigned InstCount; + /// Function attribute flags. Used to track if a function accesses memory, + /// recurses or aliases. + FFlags FunFlags; + /// List of <CalleeValueInfo, CalleeInfo> call edge pairs from this function. std::vector<EdgeTy> CallGraphEdgeList; @@ -317,15 +367,16 @@ private: std::unique_ptr<TypeIdInfo> TIdInfo; public: - FunctionSummary(GVFlags Flags, unsigned NumInsts, std::vector<ValueInfo> Refs, - std::vector<EdgeTy> CGEdges, + FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags, + std::vector<ValueInfo> Refs, std::vector<EdgeTy> CGEdges, std::vector<GlobalValue::GUID> TypeTests, std::vector<VFuncId> TypeTestAssumeVCalls, std::vector<VFuncId> TypeCheckedLoadVCalls, std::vector<ConstVCall> TypeTestAssumeConstVCalls, std::vector<ConstVCall> TypeCheckedLoadConstVCalls) : GlobalValueSummary(FunctionKind, Flags, std::move(Refs)), - InstCount(NumInsts), CallGraphEdgeList(std::move(CGEdges)) { + InstCount(NumInsts), FunFlags(FunFlags), + CallGraphEdgeList(std::move(CGEdges)) { if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() || !TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() || !TypeCheckedLoadConstVCalls.empty()) @@ -341,6 +392,9 @@ public: return GVS->getSummaryKind() == FunctionKind; } + /// Get function attribute flags. + FFlags &fflags() { return FunFlags; } + /// Get the instruction count recorded for this function. unsigned instCount() const { return InstCount; } @@ -470,6 +524,16 @@ struct TypeTestResolution { /// range [1,256], this number will be 8. This helps generate the most compact /// instruction sequences. unsigned SizeM1BitWidth = 0; + + // The following fields are only used if the target does not support the use + // of absolute symbols to store constants. Their meanings are the same as the + // corresponding fields in LowerTypeTestsModule::TypeIdLowering in + // LowerTypeTests.cpp. + + uint64_t AlignLog2 = 0; + uint64_t SizeM1 = 0; + uint8_t BitMask = 0; + uint64_t InlineBits = 0; }; struct WholeProgramDevirtResolution { @@ -493,6 +557,12 @@ struct WholeProgramDevirtResolution { /// - UniqueRetVal: the return value associated with the unique vtable (0 or /// 1). uint64_t Info = 0; + + // The following fields are only used if the target does not support the use + // of absolute symbols to store constants. + + uint32_t Byte = 0; + uint32_t Bit = 0; }; /// Resolutions for calls with all constant integer arguments (excluding the @@ -697,7 +767,8 @@ public: static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash) { SmallString<256> NewName(Name); NewName += ".llvm."; - NewName += utohexstr(ModHash[0]); // Take the first 32 bits + NewName += utostr((uint64_t(ModHash[0]) << 32) | + ModHash[1]); // Take the first 64 bits return NewName.str(); } diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h index 7f6cb5bee5a6..4687f2d53e7e 100644 --- a/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -30,6 +30,10 @@ template <> struct MappingTraits<TypeTestResolution> { static void mapping(IO &io, TypeTestResolution &res) { io.mapOptional("Kind", res.TheKind); io.mapOptional("SizeM1BitWidth", res.SizeM1BitWidth); + io.mapOptional("AlignLog2", res.AlignLog2); + io.mapOptional("SizeM1", res.SizeM1); + io.mapOptional("BitMask", res.BitMask); + io.mapOptional("InlineBits", res.InlineBits); } }; @@ -51,6 +55,8 @@ template <> struct MappingTraits<WholeProgramDevirtResolution::ByArg> { static void mapping(IO &io, WholeProgramDevirtResolution::ByArg &res) { io.mapOptional("Kind", res.TheKind); io.mapOptional("Info", res.Info); + io.mapOptional("Byte", res.Byte); + io.mapOptional("Bit", res.Bit); } }; @@ -129,7 +135,7 @@ template <> struct MappingTraits<TypeIdSummary> { struct FunctionSummaryYaml { unsigned Linkage; - bool NotEligibleToImport, Live; + bool NotEligibleToImport, Live, IsLocal; std::vector<uint64_t> TypeTests; std::vector<FunctionSummary::VFuncId> TypeTestAssumeVCalls, TypeCheckedLoadVCalls; @@ -171,6 +177,7 @@ template <> struct MappingTraits<FunctionSummaryYaml> { io.mapOptional("Linkage", summary.Linkage); io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport); io.mapOptional("Live", summary.Live); + io.mapOptional("Local", summary.IsLocal); io.mapOptional("TypeTests", summary.TypeTests); io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls); io.mapOptional("TypeCheckedLoadVCalls", summary.TypeCheckedLoadVCalls); @@ -205,9 +212,10 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> { Elem.SummaryList.push_back(llvm::make_unique<FunctionSummary>( GlobalValueSummary::GVFlags( static_cast<GlobalValue::LinkageTypes>(FSum.Linkage), - FSum.NotEligibleToImport, FSum.Live), - 0, ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{}, - std::move(FSum.TypeTests), std::move(FSum.TypeTestAssumeVCalls), + FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal), + 0, FunctionSummary::FFlags{}, ArrayRef<ValueInfo>{}, + ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests), + std::move(FSum.TypeTestAssumeVCalls), std::move(FSum.TypeCheckedLoadVCalls), std::move(FSum.TypeTestAssumeConstVCalls), std::move(FSum.TypeCheckedLoadConstVCalls))); @@ -221,7 +229,8 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> { FSums.push_back(FunctionSummaryYaml{ FSum->flags().Linkage, static_cast<bool>(FSum->flags().NotEligibleToImport), - static_cast<bool>(FSum->flags().Live), FSum->type_tests(), + static_cast<bool>(FSum->flags().Live), + static_cast<bool>(FSum->flags().DSOLocal), FSum->type_tests(), FSum->type_test_assume_vcalls(), FSum->type_checked_load_vcalls(), FSum->type_test_assume_const_vcalls(), FSum->type_checked_load_const_vcalls()}); diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h index 9df6bfc54cd4..01746e4b6a29 100644 --- a/include/llvm/IR/Operator.h +++ b/include/llvm/IR/Operator.h @@ -61,9 +61,9 @@ public: } }; -/// Utility class for integer arithmetic operators which may exhibit overflow - -/// Add, Sub, and Mul. It does not include SDiv, despite that operator having -/// the potential for overflow. +/// Utility class for integer operators which may exhibit overflow - Add, Sub, +/// Mul, and Shl. It does not include SDiv, despite that operator having the +/// potential for overflow. class OverflowingBinaryOperator : public Operator { public: enum { @@ -163,52 +163,61 @@ private: unsigned Flags = 0; - FastMathFlags(unsigned F) : Flags(F) { } + FastMathFlags(unsigned F) { + // If all 7 bits are set, turn this into -1. If the number of bits grows, + // this must be updated. This is intended to provide some forward binary + // compatibility insurance for the meaning of 'fast' in case bits are added. + if (F == 0x7F) Flags = ~0U; + else Flags = F; + } public: - /// This is how the bits are used in Value::SubclassOptionalData so they - /// should fit there too. + // This is how the bits are used in Value::SubclassOptionalData so they + // should fit there too. + // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New + // functionality will require a change in how this information is stored. enum { - UnsafeAlgebra = (1 << 0), + AllowReassoc = (1 << 0), NoNaNs = (1 << 1), NoInfs = (1 << 2), NoSignedZeros = (1 << 3), AllowReciprocal = (1 << 4), - AllowContract = (1 << 5) + AllowContract = (1 << 5), + ApproxFunc = (1 << 6) }; FastMathFlags() = default; - /// Whether any flag is set bool any() const { return Flags != 0; } + bool none() const { return Flags == 0; } + bool all() const { return Flags == ~0U; } - /// Set all the flags to false void clear() { Flags = 0; } + void set() { Flags = ~0U; } /// Flag queries + bool allowReassoc() const { return 0 != (Flags & AllowReassoc); } bool noNaNs() const { return 0 != (Flags & NoNaNs); } bool noInfs() const { return 0 != (Flags & NoInfs); } bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); } bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); } - bool allowContract() const { return 0 != (Flags & AllowContract); } - bool unsafeAlgebra() const { return 0 != (Flags & UnsafeAlgebra); } + bool allowContract() const { return 0 != (Flags & AllowContract); } + bool approxFunc() const { return 0 != (Flags & ApproxFunc); } + /// 'Fast' means all bits are set. + bool isFast() const { return all(); } /// Flag setters + void setAllowReassoc() { Flags |= AllowReassoc; } void setNoNaNs() { Flags |= NoNaNs; } void setNoInfs() { Flags |= NoInfs; } void setNoSignedZeros() { Flags |= NoSignedZeros; } void setAllowReciprocal() { Flags |= AllowReciprocal; } + // TODO: Change the other set* functions to take a parameter? void setAllowContract(bool B) { Flags = (Flags & ~AllowContract) | B * AllowContract; } - void setUnsafeAlgebra() { - Flags |= UnsafeAlgebra; - setNoNaNs(); - setNoInfs(); - setNoSignedZeros(); - setAllowReciprocal(); - setAllowContract(true); - } + void setApproxFunc() { Flags |= ApproxFunc; } + void setFast() { set(); } void operator&=(const FastMathFlags &OtherFlags) { Flags &= OtherFlags.Flags; @@ -221,18 +230,21 @@ class FPMathOperator : public Operator { private: friend class Instruction; - void setHasUnsafeAlgebra(bool B) { + /// 'Fast' means all bits are set. + void setFast(bool B) { + setHasAllowReassoc(B); + setHasNoNaNs(B); + setHasNoInfs(B); + setHasNoSignedZeros(B); + setHasAllowReciprocal(B); + setHasAllowContract(B); + setHasApproxFunc(B); + } + + void setHasAllowReassoc(bool B) { SubclassOptionalData = - (SubclassOptionalData & ~FastMathFlags::UnsafeAlgebra) | - (B * FastMathFlags::UnsafeAlgebra); - - // Unsafe algebra implies all the others - if (B) { - setHasNoNaNs(true); - setHasNoInfs(true); - setHasNoSignedZeros(true); - setHasAllowReciprocal(true); - } + (SubclassOptionalData & ~FastMathFlags::AllowReassoc) | + (B * FastMathFlags::AllowReassoc); } void setHasNoNaNs(bool B) { @@ -265,6 +277,12 @@ private: (B * FastMathFlags::AllowContract); } + void setHasApproxFunc(bool B) { + SubclassOptionalData = + (SubclassOptionalData & ~FastMathFlags::ApproxFunc) | + (B * FastMathFlags::ApproxFunc); + } + /// Convenience function for setting multiple fast-math flags. /// FMF is a mask of the bits to set. void setFastMathFlags(FastMathFlags FMF) { @@ -278,42 +296,53 @@ private: } public: - /// Test whether this operation is permitted to be - /// algebraically transformed, aka the 'A' fast-math property. - bool hasUnsafeAlgebra() const { - return (SubclassOptionalData & FastMathFlags::UnsafeAlgebra) != 0; + /// Test if this operation allows all non-strict floating-point transforms. + bool isFast() const { + return ((SubclassOptionalData & FastMathFlags::AllowReassoc) != 0 && + (SubclassOptionalData & FastMathFlags::NoNaNs) != 0 && + (SubclassOptionalData & FastMathFlags::NoInfs) != 0 && + (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0 && + (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0 && + (SubclassOptionalData & FastMathFlags::AllowContract) != 0 && + (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0); + } + + /// Test if this operation may be simplified with reassociative transforms. + bool hasAllowReassoc() const { + return (SubclassOptionalData & FastMathFlags::AllowReassoc) != 0; } - /// Test whether this operation's arguments and results are to be - /// treated as non-NaN, aka the 'N' fast-math property. + /// Test if this operation's arguments and results are assumed not-NaN. bool hasNoNaNs() const { return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0; } - /// Test whether this operation's arguments and results are to be - /// treated as NoN-Inf, aka the 'I' fast-math property. + /// Test if this operation's arguments and results are assumed not-infinite. bool hasNoInfs() const { return (SubclassOptionalData & FastMathFlags::NoInfs) != 0; } - /// Test whether this operation can treat the sign of zero - /// as insignificant, aka the 'S' fast-math property. + /// Test if this operation can ignore the sign of zero. bool hasNoSignedZeros() const { return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0; } - /// Test whether this operation is permitted to use - /// reciprocal instead of division, aka the 'R' fast-math property. + /// Test if this operation can use reciprocal multiply instead of division. bool hasAllowReciprocal() const { return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0; } - /// Test whether this operation is permitted to - /// be floating-point contracted. + /// Test if this operation can be floating-point contracted (FMA). bool hasAllowContract() const { return (SubclassOptionalData & FastMathFlags::AllowContract) != 0; } + /// Test if this operation allows approximations of math library functions or + /// intrinsics. + bool hasApproxFunc() const { + return (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0; + } + /// Convenience function for getting all the fast-math flags FastMathFlags getFastMathFlags() const { return FastMathFlags(SubclassOptionalData); @@ -472,6 +501,12 @@ public: return true; } + unsigned countNonConstantIndices() const { + return count_if(make_range(idx_begin(), idx_end()), [](const Use& use) { + return !isa<ConstantInt>(*use); + }); + } + /// \brief Accumulate the constant address offset of this GEP if possible. /// /// This routine accepts an APInt into which it will accumulate the constant diff --git a/include/llvm/IR/OptBisect.h b/include/llvm/IR/OptBisect.h index 185a5ac956f5..09e67aa79246 100644 --- a/include/llvm/IR/OptBisect.h +++ b/include/llvm/IR/OptBisect.h @@ -1,4 +1,4 @@ -//===----------- llvm/IR/OptBisect.h - LLVM Bisect support -------------===// +//===- llvm/IR/OptBisect.h - LLVM Bisect support ----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,11 +15,11 @@ #ifndef LLVM_IR_OPTBISECT_H #define LLVM_IR_OPTBISECT_H +#include "llvm/ADT/StringRef.h" + namespace llvm { class Pass; -class StringRef; -class Twine; /// This class implements a mechanism to disable passes and individual /// optimizations at compile time based on a command line option diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h index 393175675034..4f838a719512 100644 --- a/include/llvm/IR/PassManager.h +++ b/include/llvm/IR/PassManager.h @@ -470,7 +470,7 @@ public: //IR.getContext().yield(); } - // Invaliadtion was handled after each pass in the above loop for the + // Invalidation was handled after each pass in the above loop for the // current unit of IR. Therefore, the remaining analysis results in the // AnalysisManager are preserved. We mark this with a set so that we don't // need to inspect each one individually. @@ -654,9 +654,9 @@ public: /// This doesn't invalidate, but instead simply deletes, the relevant results. /// It is useful when the IR is being removed and we want to clear out all the /// memory pinned for it. - void clear(IRUnitT &IR) { + void clear(IRUnitT &IR, llvm::StringRef Name) { if (DebugLogging) - dbgs() << "Clearing all analysis results for: " << IR.getName() << "\n"; + dbgs() << "Clearing all analysis results for: " << Name << "\n"; auto ResultsListI = AnalysisResultLists.find(&IR); if (ResultsListI == AnalysisResultLists.end()) diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index acb895211644..245d72fbd16e 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -234,11 +234,35 @@ struct apint_match { return false; } }; +// Either constexpr if or renaming ConstantFP::getValueAPF to +// ConstantFP::getValue is needed to do it via single template +// function for both apint/apfloat. +struct apfloat_match { + const APFloat *&Res; + apfloat_match(const APFloat *&R) : Res(R) {} + template <typename ITy> bool match(ITy *V) { + if (auto *CI = dyn_cast<ConstantFP>(V)) { + Res = &CI->getValueAPF(); + return true; + } + if (V->getType()->isVectorTy()) + if (const auto *C = dyn_cast<Constant>(V)) + if (auto *CI = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) { + Res = &CI->getValueAPF(); + return true; + } + return false; + } +}; /// \brief Match a ConstantInt or splatted ConstantVector, binding the /// specified pointer to the contained APInt. inline apint_match m_APInt(const APInt *&Res) { return Res; } +/// \brief Match a ConstantFP or splatted ConstantVector, binding the +/// specified pointer to the contained APFloat. +inline apfloat_match m_APFloat(const APFloat *&Res) { return Res; } + template <int64_t Val> struct constantint_match { template <typename ITy> bool match(ITy *V) { if (const auto *CI = dyn_cast<ConstantInt>(V)) { @@ -933,6 +957,26 @@ inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) { } //===----------------------------------------------------------------------===// +// Matcher for LoadInst classes +// + +template <typename Op_t> struct LoadClass_match { + Op_t Op; + + LoadClass_match(const Op_t &OpMatch) : Op(OpMatch) {} + + template <typename OpTy> bool match(OpTy *V) { + if (auto *LI = dyn_cast<LoadInst>(V)) + return Op.match(LI->getPointerOperand()); + return false; + } +}; + +/// Matches LoadInst. +template <typename OpTy> inline LoadClass_match<OpTy> m_Load(const OpTy &Op) { + return LoadClass_match<OpTy>(Op); +} +//===----------------------------------------------------------------------===// // Matchers for unary operators // diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h index ef7801266777..1574fc334ffc 100644 --- a/include/llvm/IR/Type.h +++ b/include/llvm/IR/Type.h @@ -438,7 +438,7 @@ private: }; // Printing of types. -static inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) { +inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) { T.print(OS); return OS; } diff --git a/include/llvm/IR/Value.def b/include/llvm/IR/Value.def index cebd7f7297ef..e2ddba0aa159 100644 --- a/include/llvm/IR/Value.def +++ b/include/llvm/IR/Value.def @@ -56,16 +56,10 @@ #define HANDLE_CONSTANT_MARKER(MarkerName, ValueName) #endif -HANDLE_VALUE(Argument) -HANDLE_VALUE(BasicBlock) - -// FIXME: It's awkward that Value.def knows about classes in Analysis. While -// this doesn't introduce a strict link or include dependency, we should remove -// the circular dependency eventually. -HANDLE_MEMORY_VALUE(MemoryUse) -HANDLE_MEMORY_VALUE(MemoryDef) -HANDLE_MEMORY_VALUE(MemoryPhi) +// Having constant first makes the range check for isa<Constant> faster +// and smaller by one operation. +// Constant HANDLE_GLOBAL_VALUE(Function) HANDLE_GLOBAL_VALUE(GlobalAlias) HANDLE_GLOBAL_VALUE(GlobalIFunc) @@ -88,13 +82,6 @@ HANDLE_CONSTANT(ConstantFP) HANDLE_CONSTANT(ConstantPointerNull) HANDLE_CONSTANT(ConstantTokenNone) -HANDLE_METADATA_VALUE(MetadataAsValue) -HANDLE_INLINE_ASM_VALUE(InlineAsm) - -HANDLE_INSTRUCTION(Instruction) -// Enum values starting at InstructionVal are used for Instructions; -// don't add new values here! - HANDLE_CONSTANT_MARKER(ConstantFirstVal, Function) HANDLE_CONSTANT_MARKER(ConstantLastVal, ConstantTokenNone) HANDLE_CONSTANT_MARKER(ConstantDataFirstVal, UndefValue) @@ -102,6 +89,24 @@ HANDLE_CONSTANT_MARKER(ConstantDataLastVal, ConstantTokenNone) HANDLE_CONSTANT_MARKER(ConstantAggregateFirstVal, ConstantArray) HANDLE_CONSTANT_MARKER(ConstantAggregateLastVal, ConstantVector) +HANDLE_VALUE(Argument) +HANDLE_VALUE(BasicBlock) + + +HANDLE_METADATA_VALUE(MetadataAsValue) +HANDLE_INLINE_ASM_VALUE(InlineAsm) + +// FIXME: It's awkward that Value.def knows about classes in Analysis. While +// this doesn't introduce a strict link or include dependency, we should remove +// the circular dependency eventually. +HANDLE_MEMORY_VALUE(MemoryUse) +HANDLE_MEMORY_VALUE(MemoryDef) +HANDLE_MEMORY_VALUE(MemoryPhi) + +HANDLE_INSTRUCTION(Instruction) +// Enum values starting at InstructionVal are used for Instructions; +// don't add new values here! + #undef HANDLE_MEMORY_VALUE #undef HANDLE_GLOBAL_VALUE #undef HANDLE_CONSTANT diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index 9e4914973edf..d848fe921868 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -299,6 +299,12 @@ public: /// values or constant users. void replaceUsesOutsideBlock(Value *V, BasicBlock *BB); + /// replaceUsesExceptBlockAddr - Go through the uses list for this definition + /// and make each use point to "V" instead of "this" when the use is outside + /// the block. 'This's use list is expected to have at least one element. + /// Unlike replaceAllUsesWith this function skips blockaddr uses. + void replaceUsesExceptBlockAddr(Value *New); + //---------------------------------------------------------------------- // Methods for handling the chain of uses of this Value. // @@ -324,6 +330,10 @@ public: return UseList == nullptr; } + bool materialized_use_empty() const { + return UseList == nullptr; + } + using use_iterator = use_iterator_impl<Use>; using const_use_iterator = use_iterator_impl<const Use>; @@ -560,7 +570,7 @@ public: /// /// If CanBeNull is set by this function the pointer can either be null or be /// dereferenceable up to the returned number of bytes. - unsigned getPointerDereferenceableBytes(const DataLayout &DL, + uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull) const; /// \brief Returns an alignment of the pointer value. @@ -645,12 +655,6 @@ private: return Merged; } - /// \brief Tail-recursive helper for \a mergeUseLists(). - /// - /// \param[out] Next the first element in the list. - template <class Compare> - static void mergeUseListsImpl(Use *L, Use *R, Use **Next, Compare Cmp); - protected: unsigned short getSubclassDataFromValue() const { return SubclassData; } void setValueSubclassData(unsigned short D) { SubclassData = D; } @@ -756,8 +760,8 @@ template <class Compare> void Value::sortUseList(Compare Cmp) { // template <> struct isa_impl<Constant, Value> { static inline bool doit(const Value &Val) { - return Val.getValueID() >= Value::ConstantFirstVal && - Val.getValueID() <= Value::ConstantLastVal; + static_assert(Value::ConstantFirstVal == 0, "Val.getValueID() >= Value::ConstantFirstVal"); + return Val.getValueID() <= Value::ConstantLastVal; } }; diff --git a/include/llvm/IR/Verifier.h b/include/llvm/IR/Verifier.h index 15e52d9e0742..bc10f330bc8a 100644 --- a/include/llvm/IR/Verifier.h +++ b/include/llvm/IR/Verifier.h @@ -61,11 +61,13 @@ class TBAAVerifier { /// \name Helper functions used by \c visitTBAAMetadata. /// @{ MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, const MDNode *BaseNode, - APInt &Offset); + APInt &Offset, bool IsNewFormat); TBAAVerifier::TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I, - const MDNode *BaseNode); + const MDNode *BaseNode, + bool IsNewFormat); TBAABaseNodeSummary verifyTBAABaseNodeImpl(Instruction &I, - const MDNode *BaseNode); + const MDNode *BaseNode, + bool IsNewFormat); bool isValidScalarTBAANode(const MDNode *MD); /// @} |
