From f0c0337bbfb63d1f9edf145aab535bdf82c20454 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 3 May 2017 20:26:23 +0000 Subject: Vendor import of clang trunk r302069: https://llvm.org/svn/llvm-project/cfe/trunk@302069 --- docs/UndefinedBehaviorSanitizer.rst | 2 - include/clang/Basic/Diagnostic.h | 75 +- include/clang/Basic/DiagnosticASTKinds.td | 4 +- include/clang/Basic/DiagnosticIDs.h | 22 +- include/clang/Basic/DiagnosticSemaKinds.td | 5 +- .../clang/Basic/DiagnosticSerializationKinds.td | 10 +- include/clang/Sema/Sema.h | 4 +- .../Core/BugReporter/CommonBugCategories.h | 1 + lib/AST/ExprConstant.cpp | 242 ++--- lib/AST/ODRHash.cpp | 82 ++ lib/Basic/Diagnostic.cpp | 12 +- lib/Basic/DiagnosticIDs.cpp | 22 +- lib/CodeGen/CGCall.cpp | 4 +- lib/CodeGen/CGExprScalar.cpp | 86 +- lib/CodeGen/CGObjCGNU.cpp | 2 +- lib/CodeGen/CodeGenModule.cpp | 13 +- lib/CodeGen/TargetInfo.cpp | 10 +- lib/Driver/SanitizerArgs.cpp | 12 +- lib/Parse/ParseOpenMP.cpp | 24 + lib/Sema/SemaChecking.cpp | 167 ++- lib/Sema/SemaDecl.cpp | 3 +- lib/Sema/SemaExpr.cpp | 2 +- lib/Sema/SemaExprCXX.cpp | 53 +- lib/Serialization/ASTReader.cpp | 133 ++- lib/Serialization/ASTWriter.cpp | 21 +- lib/StaticAnalyzer/Checkers/MallocChecker.cpp | 33 +- lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp | 2 +- lib/StaticAnalyzer/Checkers/ValistChecker.cpp | 5 +- lib/StaticAnalyzer/Core/CommonBugCategories.cpp | 1 + test/Analysis/MismatchedDeallocator-path-notes.cpp | 2 +- test/Analysis/NewDelete-path-notes.cpp | 4 +- .../diagnostics/report-issues-within-main-file.cpp | 2 +- test/Analysis/edges-new.mm | 4 +- test/Analysis/malloc-plist.c | 36 +- test/Analysis/plist-macros.cpp | 4 +- test/CodeGen/PR32874.c | 61 ++ test/CodeGen/aarch64-neon-2velem.c | 552 +++++----- test/CodeGen/aarch64-neon-intrinsics.c | 1134 ++++++++++---------- test/CodeGen/aarch64-neon-misc.c | 330 +++--- test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c | 42 +- test/CodeGen/arm_neon_intrinsics.c | 1082 +++++++++---------- test/CodeGen/libcalls.c | 4 +- test/Driver/arch-specific-libdir-rpath.c | 2 +- test/Driver/fsanitize-coverage.c | 26 +- test/Index/keep-going.cpp | 4 +- test/Modules/diag-flags.cpp | 38 +- test/Modules/odr_hash.cpp | 208 +++- test/OpenMP/target_ast_print.cpp | 63 +- test/OpenMP/target_map_messages.cpp | 2 + test/Sema/varargs.c | 2 +- test/SemaCXX/constexpr-array-unknown-bound.cpp | 25 - test/SemaCXX/cxx1z-lambda-star-this.cpp | 301 ++++-- test/SemaCXX/warn-thread-safety-parsing.cpp | 14 +- test/SemaObjCXX/arc-overloading.mm | 2 +- tools/libclang/CIndex.cpp | 2 +- unittests/Basic/DiagnosticTest.cpp | 37 +- 56 files changed, 2836 insertions(+), 2199 deletions(-) create mode 100644 test/CodeGen/PR32874.c delete mode 100644 test/SemaCXX/constexpr-array-unknown-bound.cpp diff --git a/docs/UndefinedBehaviorSanitizer.rst b/docs/UndefinedBehaviorSanitizer.rst index 9bec5506359ff..d6fdad2a0c011 100644 --- a/docs/UndefinedBehaviorSanitizer.rst +++ b/docs/UndefinedBehaviorSanitizer.rst @@ -157,8 +157,6 @@ will need to: ``UBSAN_OPTIONS=print_stacktrace=1``. #. Make sure ``llvm-symbolizer`` binary is in ``PATH``. -Stacktrace printing for UBSan issues is currently not supported on Darwin. - Issue Suppression ================= diff --git a/include/clang/Basic/Diagnostic.h b/include/clang/Basic/Diagnostic.h index a8e11bcb89278..22cded21c12df 100644 --- a/include/clang/Basic/Diagnostic.h +++ b/include/clang/Basic/Diagnostic.h @@ -178,12 +178,7 @@ public: private: unsigned char AllExtensionsSilenced; // Used by __extension__ - bool IgnoreAllWarnings; // Ignore all warnings: -w - bool WarningsAsErrors; // Treat warnings like errors. - bool EnableAllWarnings; // Enable all warnings. - bool ErrorsAsFatal; // Treat errors like fatal errors. - bool FatalsAsError; // Treat fatal errors like errors. - bool SuppressSystemWarnings; // Suppress warnings in system headers. + bool SuppressAfterFatalError; // Suppress diagnostics after a fatal error? bool SuppressAllDiagnostics; // Suppress all diagnostics. bool ElideType; // Elide common types of templates. bool PrintTemplateTree; // Print a tree when comparing templates. @@ -194,7 +189,6 @@ private: // 0 -> no limit. unsigned ConstexprBacktraceLimit; // Cap on depth of constexpr evaluation // backtrace stack, 0 -> no limit. - diag::Severity ExtBehavior; // Map extensions to warnings or errors? IntrusiveRefCntPtr Diags; IntrusiveRefCntPtr DiagOpts; DiagnosticConsumer *Client; @@ -216,6 +210,19 @@ private: llvm::DenseMap DiagMap; public: + // "Global" configuration state that can actually vary between modules. + unsigned IgnoreAllWarnings : 1; // Ignore all warnings: -w + unsigned EnableAllWarnings : 1; // Enable all warnings. + unsigned WarningsAsErrors : 1; // Treat warnings like errors. + unsigned ErrorsAsFatal : 1; // Treat errors like fatal errors. + unsigned SuppressSystemWarnings : 1; // Suppress warnings in system headers. + diag::Severity ExtBehavior; // Map extensions to warnings or errors? + + DiagState() + : IgnoreAllWarnings(false), EnableAllWarnings(false), + WarningsAsErrors(false), ErrorsAsFatal(false), + SuppressSystemWarnings(false), ExtBehavior(diag::Severity::Ignored) {} + typedef llvm::DenseMap::iterator iterator; typedef llvm::DenseMap::const_iterator const_iterator; @@ -493,33 +500,47 @@ public: /// \brief When set to true, any unmapped warnings are ignored. /// /// If this and WarningsAsErrors are both set, then this one wins. - void setIgnoreAllWarnings(bool Val) { IgnoreAllWarnings = Val; } - bool getIgnoreAllWarnings() const { return IgnoreAllWarnings; } + void setIgnoreAllWarnings(bool Val) { + GetCurDiagState()->IgnoreAllWarnings = Val; + } + bool getIgnoreAllWarnings() const { + return GetCurDiagState()->IgnoreAllWarnings; + } /// \brief When set to true, any unmapped ignored warnings are no longer /// ignored. /// /// If this and IgnoreAllWarnings are both set, then that one wins. - void setEnableAllWarnings(bool Val) { EnableAllWarnings = Val; } - bool getEnableAllWarnings() const { return EnableAllWarnings; } + void setEnableAllWarnings(bool Val) { + GetCurDiagState()->EnableAllWarnings = Val; + } + bool getEnableAllWarnings() const { + return GetCurDiagState()->EnableAllWarnings; + } /// \brief When set to true, any warnings reported are issued as errors. - void setWarningsAsErrors(bool Val) { WarningsAsErrors = Val; } - bool getWarningsAsErrors() const { return WarningsAsErrors; } + void setWarningsAsErrors(bool Val) { + GetCurDiagState()->WarningsAsErrors = Val; + } + bool getWarningsAsErrors() const { + return GetCurDiagState()->WarningsAsErrors; + } /// \brief When set to true, any error reported is made a fatal error. - void setErrorsAsFatal(bool Val) { ErrorsAsFatal = Val; } - bool getErrorsAsFatal() const { return ErrorsAsFatal; } + void setErrorsAsFatal(bool Val) { GetCurDiagState()->ErrorsAsFatal = Val; } + bool getErrorsAsFatal() const { return GetCurDiagState()->ErrorsAsFatal; } - /// \brief When set to true, any fatal error reported is made an error. - /// - /// This setting takes precedence over the setErrorsAsFatal setting above. - void setFatalsAsError(bool Val) { FatalsAsError = Val; } - bool getFatalsAsError() const { return FatalsAsError; } + /// \brief When set to true (the default), suppress further diagnostics after + /// a fatal error. + void setSuppressAfterFatalError(bool Val) { SuppressAfterFatalError = Val; } /// \brief When set to true mask warnings that come from system headers. - void setSuppressSystemWarnings(bool Val) { SuppressSystemWarnings = Val; } - bool getSuppressSystemWarnings() const { return SuppressSystemWarnings; } + void setSuppressSystemWarnings(bool Val) { + GetCurDiagState()->SuppressSystemWarnings = Val; + } + bool getSuppressSystemWarnings() const { + return GetCurDiagState()->SuppressSystemWarnings; + } /// \brief Suppress all diagnostics, to silence the front end when we /// know that we don't want any more diagnostics to be passed along to the @@ -571,11 +592,15 @@ public: } /// \brief Controls whether otherwise-unmapped extension diagnostics are - /// mapped onto ignore/warning/error. + /// mapped onto ignore/warning/error. /// /// This corresponds to the GCC -pedantic and -pedantic-errors option. - void setExtensionHandlingBehavior(diag::Severity H) { ExtBehavior = H; } - diag::Severity getExtensionHandlingBehavior() const { return ExtBehavior; } + void setExtensionHandlingBehavior(diag::Severity H) { + GetCurDiagState()->ExtBehavior = H; + } + diag::Severity getExtensionHandlingBehavior() const { + return GetCurDiagState()->ExtBehavior; + } /// \brief Counter bumped when an __extension__ block is/ encountered. /// diff --git a/include/clang/Basic/DiagnosticASTKinds.td b/include/clang/Basic/DiagnosticASTKinds.td index a07dbf631152b..652d06278557c 100644 --- a/include/clang/Basic/DiagnosticASTKinds.td +++ b/include/clang/Basic/DiagnosticASTKinds.td @@ -154,14 +154,12 @@ def note_constexpr_baa_insufficient_alignment : Note< def note_constexpr_baa_value_insufficient_alignment : Note< "value of the aligned pointer (%0) is not a multiple of the asserted %1 " "%plural{1:byte|:bytes}1">; -def note_constexpr_array_unknown_bound_arithmetic : Note< - "cannot perform pointer arithmetic on pointer to array without constant bound">; def warn_integer_constant_overflow : Warning< "overflow in expression; result is %0 with type %1">, InGroup>; -// This is a temporary diagnostic, and shall be removed once our +// This is a temporary diagnostic, and shall be removed once our // implementation is complete, and like the preceding constexpr notes belongs // in Sema. def note_unimplemented_constexpr_lambda_feature_ast : Note< diff --git a/include/clang/Basic/DiagnosticIDs.h b/include/clang/Basic/DiagnosticIDs.h index f5f70cb5e7d3c..7646e33d2366e 100644 --- a/include/clang/Basic/DiagnosticIDs.h +++ b/include/clang/Basic/DiagnosticIDs.h @@ -122,15 +122,21 @@ public: bool wasUpgradedFromWarning() const { return WasUpgradedFromWarning; } void setUpgradedFromWarning(bool Value) { WasUpgradedFromWarning = Value; } - /// Serialize the bits that aren't based on context. - unsigned serializeBits() const { - return (WasUpgradedFromWarning << 3) | Severity; + /// Serialize this mapping as a raw integer. + unsigned serialize() const { + return (IsUser << 7) | (IsPragma << 6) | (HasNoWarningAsError << 5) | + (HasNoErrorAsFatal << 4) | (WasUpgradedFromWarning << 3) | Severity; } - static diag::Severity deserializeSeverity(unsigned Bits) { - return (diag::Severity)(Bits & 0x7); - } - static bool deserializeUpgradedFromWarning(unsigned Bits) { - return Bits >> 3; + /// Deserialize a mapping. + static DiagnosticMapping deserialize(unsigned Bits) { + DiagnosticMapping Result; + Result.IsUser = (Bits >> 7) & 1; + Result.IsPragma = (Bits >> 6) & 1; + Result.HasNoWarningAsError = (Bits >> 5) & 1; + Result.HasNoErrorAsFatal = (Bits >> 4) & 1; + Result.WasUpgradedFromWarning = (Bits >> 3) & 1; + Result.Severity = Bits & 0x7; + return Result; } }; diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index d62ab098364d1..6a3a2124a5ffc 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -2820,9 +2820,6 @@ def warn_cconv_structors : Warning< def err_regparm_mismatch : Error<"function declared with regparm(%0) " "attribute was previously declared " "%plural{0:without the regparm|:with the regparm(%1)}1 attribute">; -def err_returns_retained_mismatch : Error< - "function declared with the ns_returns_retained attribute " - "was previously declared without the ns_returns_retained attribute">; def err_function_attribute_mismatch : Error< "function declared with %0 attribute " "was previously declared without the %0 attribute">; @@ -8048,7 +8045,7 @@ def err_64_bit_builtin_32_bit_tgt : Error< "this builtin is only available on 64-bit targets">; def err_ppc_builtin_only_on_pwr7 : Error< "this builtin is only valid on POWER7 or later CPUs">; -def err_x86_builtin_32_bit_tgt : Error< +def err_x86_builtin_64_only : Error< "this builtin is only available on x86-64 targets">; def err_x86_builtin_invalid_rounding : Error< "invalid rounding argument">; diff --git a/include/clang/Basic/DiagnosticSerializationKinds.td b/include/clang/Basic/DiagnosticSerializationKinds.td index 4af4c18ced333..35e2f67e24b6d 100644 --- a/include/clang/Basic/DiagnosticSerializationKinds.td +++ b/include/clang/Basic/DiagnosticSerializationKinds.td @@ -146,7 +146,10 @@ def err_module_odr_violation_mismatch_decl_diff : Error< "method %4 is %select{not static|static}5|" "method %4 is %select{not volatile|volatile}5|" "method %4 is %select{not const|const}5|" - "method %4 is %select{not inline|inline}5}3">; + "method %4 is %select{not inline|inline}5|" + "method %4 that has %5 parameter%s5|" + "method %4 with %ordinal5 parameter of type %6%select{| decayed from %8}7|" + "method %4 with %ordinal5 parameter named %6}3">; def note_module_odr_violation_mismatch_decl_diff : Note<"but in '%0' found " "%select{" @@ -166,7 +169,10 @@ def note_module_odr_violation_mismatch_decl_diff : Note<"but in '%0' found " "method %2 is %select{not static|static}3|" "method %2 is %select{not volatile|volatile}3|" "method %2 is %select{not const|const}3|" - "method %2 is %select{not inline|inline}3}1">; + "method %2 is %select{not inline|inline}3|" + "method %2 that has %3 parameter%s3|" + "method %2 with %ordinal3 parameter of type %4%select{| decayed from %6}5|" + "method %2 with %ordinal3 parameter named %4}1">; def warn_module_uses_date_time : Warning< "%select{precompiled header|module}0 uses __DATE__ or __TIME__">, diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index e24d6436f3313..eca383bee2f57 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -10068,9 +10068,7 @@ private: bool CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); - bool SemaBuiltinVAStartImpl(CallExpr *TheCall); - bool SemaBuiltinVAStart(CallExpr *TheCall); - bool SemaBuiltinMSVAStart(CallExpr *TheCall); + bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); bool SemaBuiltinVAStartARM(CallExpr *Call); bool SemaBuiltinUnorderedCompare(CallExpr *TheCall); bool SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs); diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h b/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h index 8df2bc331b512..0e80e7bc19ba4 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h +++ b/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h @@ -17,6 +17,7 @@ namespace clang { extern const char * const CoreFoundationObjectiveC; extern const char * const LogicError; extern const char * const MemoryCoreFoundationObjectiveC; + extern const char * const MemoryError; extern const char * const UnixAPI; } } diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp index 4d0805323e568..75bb0cac51b83 100644 --- a/lib/AST/ExprConstant.cpp +++ b/lib/AST/ExprConstant.cpp @@ -148,8 +148,7 @@ namespace { static unsigned findMostDerivedSubobject(ASTContext &Ctx, APValue::LValueBase Base, ArrayRef Path, - uint64_t &ArraySize, QualType &Type, bool &IsArray, - bool &IsUnsizedArray) { + uint64_t &ArraySize, QualType &Type, bool &IsArray) { // This only accepts LValueBases from APValues, and APValues don't support // arrays that lack size info. assert(!isBaseAnAllocSizeCall(Base) && @@ -158,34 +157,28 @@ namespace { Type = getType(Base); for (unsigned I = 0, N = Path.size(); I != N; ++I) { - if (auto AT = Ctx.getAsArrayType(Type)) { + if (Type->isArrayType()) { + const ConstantArrayType *CAT = + cast(Ctx.getAsArrayType(Type)); + Type = CAT->getElementType(); + ArraySize = CAT->getSize().getZExtValue(); MostDerivedLength = I + 1; IsArray = true; - if (auto CAT = Ctx.getAsConstantArrayType(Type)) - ArraySize = CAT->getSize().getZExtValue(); - else { - ArraySize = 0; - IsUnsizedArray = true; - } - Type = AT->getElementType(); } else if (Type->isAnyComplexType()) { const ComplexType *CT = Type->castAs(); Type = CT->getElementType(); ArraySize = 2; MostDerivedLength = I + 1; IsArray = true; - IsUnsizedArray = false; } else if (const FieldDecl *FD = getAsField(Path[I])) { Type = FD->getType(); ArraySize = 0; MostDerivedLength = I + 1; IsArray = false; - IsUnsizedArray = false; } else { // Path[I] describes a base class. ArraySize = 0; IsArray = false; - IsUnsizedArray = false; } } return MostDerivedLength; @@ -207,9 +200,8 @@ namespace { /// Is this a pointer one past the end of an object? unsigned IsOnePastTheEnd : 1; - /// Indicator of whether the most-derived object is an unsized array (e.g. - /// of unknown bound). - unsigned MostDerivedIsAnUnsizedArray : 1; + /// Indicator of whether the first entry is an unsized array. + unsigned FirstEntryIsAnUnsizedArray : 1; /// Indicator of whether the most-derived object is an array element. unsigned MostDerivedIsArrayElement : 1; @@ -239,28 +231,25 @@ namespace { explicit SubobjectDesignator(QualType T) : Invalid(false), IsOnePastTheEnd(false), - MostDerivedIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), + FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), MostDerivedPathLength(0), MostDerivedArraySize(0), MostDerivedType(T) {} SubobjectDesignator(ASTContext &Ctx, const APValue &V) : Invalid(!V.isLValue() || !V.hasLValuePath()), IsOnePastTheEnd(false), - MostDerivedIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), + FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), MostDerivedPathLength(0), MostDerivedArraySize(0) { assert(V.isLValue() && "Non-LValue used to make an LValue designator?"); if (!Invalid) { IsOnePastTheEnd = V.isLValueOnePastTheEnd(); ArrayRef VEntries = V.getLValuePath(); Entries.insert(Entries.end(), VEntries.begin(), VEntries.end()); - if (auto Base = V.getLValueBase()) { - if (auto Decl = Base.dyn_cast()) - Base = cast(Decl->getMostRecentDecl()); - bool IsArray = false, IsUnsizedArray = false; + if (V.getLValueBase()) { + bool IsArray = false; MostDerivedPathLength = findMostDerivedSubobject( - Ctx, Base, V.getLValuePath(), MostDerivedArraySize, - MostDerivedType, IsArray, IsUnsizedArray); - MostDerivedIsArrayElement = IsArray; - MostDerivedIsAnUnsizedArray = IsUnsizedArray; + Ctx, V.getLValueBase(), V.getLValuePath(), MostDerivedArraySize, + MostDerivedType, IsArray); + MostDerivedIsArrayElement = IsArray; } } } @@ -274,7 +263,7 @@ namespace { /// known bound. bool isMostDerivedAnUnsizedArray() const { assert(!Invalid && "Calling this makes no sense on invalid designators"); - return MostDerivedIsAnUnsizedArray; + return Entries.size() == 1 && FirstEntryIsAnUnsizedArray; } /// Determine what the most derived array's size is. Results in an assertion @@ -314,7 +303,6 @@ namespace { // This is a most-derived object. MostDerivedType = CAT->getElementType(); MostDerivedIsArrayElement = true; - MostDerivedIsAnUnsizedArray = false; MostDerivedArraySize = CAT->getSize().getZExtValue(); MostDerivedPathLength = Entries.size(); } @@ -327,7 +315,6 @@ namespace { MostDerivedType = ElemTy; MostDerivedIsArrayElement = true; - MostDerivedIsAnUnsizedArray = true; // The value in MostDerivedArraySize is undefined in this case. So, set it // to an arbitrary value that's likely to loudly break things if it's // used. @@ -346,7 +333,6 @@ namespace { if (const FieldDecl *FD = dyn_cast(D)) { MostDerivedType = FD->getType(); MostDerivedIsArrayElement = false; - MostDerivedIsAnUnsizedArray = false; MostDerivedArraySize = 0; MostDerivedPathLength = Entries.size(); } @@ -361,14 +347,53 @@ namespace { // is unlikely to matter. MostDerivedType = EltTy; MostDerivedIsArrayElement = true; - MostDerivedIsAnUnsizedArray = false; MostDerivedArraySize = 2; MostDerivedPathLength = Entries.size(); } void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, const APSInt &N); /// Add N to the address of this subobject. - void adjustIndex(EvalInfo &Info, const Expr *E, APSInt N); + void adjustIndex(EvalInfo &Info, const Expr *E, APSInt N) { + if (Invalid || !N) return; + uint64_t TruncatedN = N.extOrTrunc(64).getZExtValue(); + if (isMostDerivedAnUnsizedArray()) { + // Can't verify -- trust that the user is doing the right thing (or if + // not, trust that the caller will catch the bad behavior). + // FIXME: Should we reject if this overflows, at least? + Entries.back().ArrayIndex += TruncatedN; + return; + } + + // [expr.add]p4: For the purposes of these operators, a pointer to a + // nonarray object behaves the same as a pointer to the first element of + // an array of length one with the type of the object as its element type. + bool IsArray = MostDerivedPathLength == Entries.size() && + MostDerivedIsArrayElement; + uint64_t ArrayIndex = + IsArray ? Entries.back().ArrayIndex : (uint64_t)IsOnePastTheEnd; + uint64_t ArraySize = + IsArray ? getMostDerivedArraySize() : (uint64_t)1; + + if (N < -(int64_t)ArrayIndex || N > ArraySize - ArrayIndex) { + // Calculate the actual index in a wide enough type, so we can include + // it in the note. + N = N.extend(std::max(N.getBitWidth() + 1, 65)); + (llvm::APInt&)N += ArrayIndex; + assert(N.ugt(ArraySize) && "bounds check failed for in-bounds index"); + diagnosePointerArithmetic(Info, E, N); + setInvalid(); + return; + } + + ArrayIndex += TruncatedN; + assert(ArrayIndex <= ArraySize && + "bounds check succeeded for out-of-bounds index"); + + if (IsArray) + Entries.back().ArrayIndex = ArrayIndex; + else + IsOnePastTheEnd = (ArrayIndex != 0); + } }; /// A stack frame in the constexpr call stack. @@ -470,7 +495,7 @@ namespace { // FIXME: Force the precision of the source value down so we don't // print digits which are usually useless (we don't really care here if // we truncate a digit by accident in edge cases). Ideally, - // APFloat::toString would automatically print the shortest + // APFloat::toString would automatically print the shortest // representation which rounds to the correct value, but it's a bit // tricky to implement. unsigned precision = @@ -695,7 +720,7 @@ namespace { private: OptionalDiagnostic Diag(SourceLocation Loc, diag::kind DiagId, unsigned ExtraNotes, bool IsCCEDiag) { - + if (EvalStatus.Diag) { // If we have a prior diagnostic, it will be noting that the expression // isn't a constant expression. This diagnostic is more important, @@ -748,7 +773,7 @@ namespace { unsigned ExtraNotes = 0) { return Diag(Loc, DiagId, ExtraNotes, false); } - + OptionalDiagnostic FFDiag(const Expr *E, diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, unsigned ExtraNotes = 0) { @@ -1061,53 +1086,6 @@ void SubobjectDesignator::diagnosePointerArithmetic(EvalInfo &Info, setInvalid(); } -void SubobjectDesignator::adjustIndex(EvalInfo &Info, const Expr *E, APSInt N) { - if (Invalid || !N) return; - - uint64_t TruncatedN = N.extOrTrunc(64).getZExtValue(); - if (isMostDerivedAnUnsizedArray()) { - // If we're dealing with an array without constant bound, the expression is - // not a constant expression. - if (!Info.checkingPotentialConstantExpression()) - Info.CCEDiag(E, diag::note_constexpr_array_unknown_bound_arithmetic); - // Can't verify -- trust that the user is doing the right thing (or if - // not, trust that the caller will catch the bad behavior). - // FIXME: Should we reject if this overflows, at least? - Entries.back().ArrayIndex += TruncatedN; - return; - } - - // [expr.add]p4: For the purposes of these operators, a pointer to a - // nonarray object behaves the same as a pointer to the first element of - // an array of length one with the type of the object as its element type. - bool IsArray = MostDerivedPathLength == Entries.size() && - MostDerivedIsArrayElement; - uint64_t ArrayIndex = - IsArray ? Entries.back().ArrayIndex : (uint64_t)IsOnePastTheEnd; - uint64_t ArraySize = - IsArray ? getMostDerivedArraySize() : (uint64_t)1; - - if (N < -(int64_t)ArrayIndex || N > ArraySize - ArrayIndex) { - // Calculate the actual index in a wide enough type, so we can include - // it in the note. - N = N.extend(std::max(N.getBitWidth() + 1, 65)); - (llvm::APInt&)N += ArrayIndex; - assert(N.ugt(ArraySize) && "bounds check failed for in-bounds index"); - diagnosePointerArithmetic(Info, E, N); - setInvalid(); - return; - } - - ArrayIndex += TruncatedN; - assert(ArrayIndex <= ArraySize && - "bounds check succeeded for out-of-bounds index"); - - if (IsArray) - Entries.back().ArrayIndex = ArrayIndex; - else - IsOnePastTheEnd = (ArrayIndex != 0); -} - CallStackFrame::CallStackFrame(EvalInfo &Info, SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, APValue *Arguments) @@ -1236,6 +1214,8 @@ namespace { IsNullPtr); else { assert(!InvalidBase && "APValues can't handle invalid LValue bases"); + assert(!Designator.FirstEntryIsAnUnsizedArray && + "Unsized array with a valid base?"); V = APValue(Base, Offset, Designator.Entries, Designator.IsOnePastTheEnd, CallIndex, IsNullPtr); } @@ -1300,9 +1280,12 @@ namespace { if (checkSubobject(Info, E, isa(D) ? CSK_Field : CSK_Base)) Designator.addDeclUnchecked(D, Virtual); } - void addUnsizedArray(EvalInfo &Info, const Expr *E, QualType ElemTy) { - if (checkSubobject(Info, E, CSK_ArrayToPointer)) - Designator.addUnsizedArrayUnchecked(ElemTy); + void addUnsizedArray(EvalInfo &Info, QualType ElemTy) { + assert(Designator.Entries.empty() && getType(Base)->isPointerType()); + assert(isBaseAnAllocSizeCall(Base) && + "Only alloc_size bases can have unsized arrays"); + Designator.FirstEntryIsAnUnsizedArray = true; + Designator.addUnsizedArrayUnchecked(ElemTy); } void addArray(EvalInfo &Info, const Expr *E, const ConstantArrayType *CAT) { if (checkSubobject(Info, E, CSK_ArrayToPointer)) @@ -3033,15 +3016,6 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E, if (!evaluateVarDeclInit(Info, E, VD, Frame, BaseVal)) return CompleteObject(); - - // The complete object can be an array of unknown bound, in which case we - // have to find the most recent declaration and adjust the type accordingly. - if (Info.Ctx.getAsIncompleteArrayType(BaseType)) { - QualType MostRecentType = - cast(D->getMostRecentDecl())->getType(); - if (Info.Ctx.getAsConstantArrayType(MostRecentType)) - BaseType = MostRecentType; - } } else { const Expr *Base = LVal.Base.dyn_cast(); @@ -4124,13 +4098,13 @@ static bool CheckConstexprFunction(EvalInfo &Info, SourceLocation CallLoc, if (Info.getLangOpts().CPlusPlus11) { const FunctionDecl *DiagDecl = Definition ? Definition : Declaration; - + // If this function is not constexpr because it is an inherited // non-constexpr constructor, diagnose that directly. auto *CD = dyn_cast(DiagDecl); if (CD && CD->isInheritingConstructor()) { auto *Inherited = CD->getInheritedConstructor().getConstructor(); - if (!Inherited->isConstexpr()) + if (!Inherited->isConstexpr()) DiagDecl = CD = Inherited; } @@ -4667,7 +4641,7 @@ public: return false; This = &ThisVal; Args = Args.slice(1); - } else if (MD && MD->isLambdaStaticInvoker()) { + } else if (MD && MD->isLambdaStaticInvoker()) { // Map the static invoker for the lambda back to the call operator. // Conveniently, we don't have to slice out the 'this' argument (as is // being done for the non-static case), since a static member function @@ -4702,7 +4676,7 @@ public: FD = LambdaCallOp; } - + } else return Error(E); @@ -5462,7 +5436,7 @@ static bool evaluateLValueAsAllocSize(EvalInfo &Info, APValue::LValueBase Base, Result.setInvalid(E); QualType Pointee = E->getType()->castAs()->getPointeeType(); - Result.addUnsizedArray(Info, E, Pointee); + Result.addUnsizedArray(Info, Pointee); return true; } @@ -5541,7 +5515,7 @@ public: // Update 'Result' to refer to the data member/field of the closure object // that represents the '*this' capture. if (!HandleLValueMember(Info, E, Result, - Info.CurrentCall->LambdaThisCaptureField)) + Info.CurrentCall->LambdaThisCaptureField)) return false; // If we captured '*this' by reference, replace the field with its referent. if (Info.CurrentCall->LambdaThisCaptureField->getType() @@ -5682,18 +5656,12 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr* E) { Info, Result, SubExpr)) return false; } - // The result is a pointer to the first element of the array. if (const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(SubExpr->getType())) Result.addArray(Info, E, CAT); - // If the array hasn't been given a bound yet, add it as an unsized one. - else { - auto AT = Info.Ctx.getAsArrayType(SubExpr->getType()); - assert(AT && "Array to pointer decay on non-array object?"); - Result.addUnsizedArray(Info, E, AT->getElementType()); - } - + else + Result.Designator.setInvalid(); return true; case CK_FunctionToPointerDecay: @@ -5761,7 +5729,7 @@ bool PointerExprEvaluator::visitNonBuiltinCallExpr(const CallExpr *E) { Result.setInvalid(E); QualType PointeeTy = E->getType()->castAs()->getPointeeType(); - Result.addUnsizedArray(Info, E, PointeeTy); + Result.addUnsizedArray(Info, PointeeTy); return true; } @@ -6395,7 +6363,7 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { if (ClosureClass->isInvalidDecl()) return false; if (Info.checkingPotentialConstantExpression()) return true; - + const size_t NumFields = std::distance(ClosureClass->field_begin(), ClosureClass->field_end()); @@ -6414,7 +6382,7 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { assert(CaptureInitIt != E->capture_init_end()); // Get the initializer for this field Expr *const CurFieldInit = *CaptureInitIt++; - + // If there is no initializer, either this is a VLA or an error has // occurred. if (!CurFieldInit) @@ -6615,18 +6583,18 @@ VectorExprEvaluator::VisitInitListExpr(const InitListExpr *E) { // The number of initializers can be less than the number of // vector elements. For OpenCL, this can be due to nested vector - // initialization. For GCC compatibility, missing trailing elements + // initialization. For GCC compatibility, missing trailing elements // should be initialized with zeroes. unsigned CountInits = 0, CountElts = 0; while (CountElts < NumElements) { // Handle nested vector initialization. - if (CountInits < NumInits + if (CountInits < NumInits && E->getInit(CountInits)->getType()->isVectorType()) { APValue v; if (!EvaluateVector(E->getInit(CountInits), v, Info)) return Error(E); unsigned vlen = v.getVectorLength(); - for (unsigned j = 0; j < vlen; j++) + for (unsigned j = 0; j < vlen; j++) Elements.push_back(v.getVectorElt(j)); CountElts += vlen; } else if (EltTy->isIntegerType()) { @@ -6902,7 +6870,7 @@ public: } bool Success(const llvm::APInt &I, const Expr *E, APValue &Result) { - assert(E->getType()->isIntegralOrEnumerationType() && + assert(E->getType()->isIntegralOrEnumerationType() && "Invalid evaluation result."); assert(I.getBitWidth() == Info.Ctx.getIntWidth(E->getType()) && "Invalid evaluation result."); @@ -6916,7 +6884,7 @@ public: } bool Success(uint64_t Value, const Expr *E, APValue &Result) { - assert(E->getType()->isIntegralOrEnumerationType() && + assert(E->getType()->isIntegralOrEnumerationType() && "Invalid evaluation result."); Result = APValue(Info.Ctx.MakeIntValue(Value, E->getType())); return true; @@ -6992,7 +6960,7 @@ public: } return Success(Info.ArrayInitIndex, E); } - + // Note, GNU defines __null as an integer, not a pointer. bool VisitGNUNullExpr(const GNUNullExpr *E) { return ZeroInitialization(E); @@ -7356,8 +7324,10 @@ static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) { unsigned I = 0; QualType BaseType = getType(Base); - // If this is an alloc_size base, we should ignore the initial array index - if (isBaseAnAllocSizeCall(Base)) { + if (LVal.Designator.FirstEntryIsAnUnsizedArray) { + assert(isBaseAnAllocSizeCall(Base) && + "Unsized array in non-alloc_size call?"); + // If this is an alloc_size base, we should ignore the initial array index ++I; BaseType = BaseType->castAs()->getPointeeType(); } @@ -8144,12 +8114,12 @@ bool DataRecursiveIntBinOpEvaluator:: Result = RHSResult.Val; return true; } - + if (E->isLogicalOp()) { bool lhsResult, rhsResult; bool LHSIsOK = HandleConversionToBool(LHSResult.Val, lhsResult); bool RHSIsOK = HandleConversionToBool(RHSResult.Val, rhsResult); - + if (LHSIsOK) { if (RHSIsOK) { if (E->getOpcode() == BO_LOr) @@ -8165,26 +8135,26 @@ bool DataRecursiveIntBinOpEvaluator:: return Success(rhsResult, E, Result); } } - + return false; } - + assert(E->getLHS()->getType()->isIntegralOrEnumerationType() && E->getRHS()->getType()->isIntegralOrEnumerationType()); - + if (LHSResult.Failed || RHSResult.Failed) return false; - + const APValue &LHSVal = LHSResult.Val; const APValue &RHSVal = RHSResult.Val; - + // Handle cases like (unsigned long)&a + 4. if (E->isAdditiveOp() && LHSVal.isLValue() && RHSVal.isInt()) { Result = LHSVal; addOrSubLValueAsInteger(Result, RHSVal.getInt(), E->getOpcode() == BO_Sub); return true; } - + // Handle cases like 4 + (unsigned long)&a if (E->getOpcode() == BO_Add && RHSVal.isLValue() && LHSVal.isInt()) { @@ -8192,7 +8162,7 @@ bool DataRecursiveIntBinOpEvaluator:: addOrSubLValueAsInteger(Result, LHSVal.getInt(), /*IsSub*/false); return true; } - + if (E->getOpcode() == BO_Sub && LHSVal.isLValue() && RHSVal.isLValue()) { // Handle (intptr_t)&&A - (intptr_t)&&B. if (!LHSVal.getLValueOffset().isZero() || @@ -8231,7 +8201,7 @@ bool DataRecursiveIntBinOpEvaluator:: void DataRecursiveIntBinOpEvaluator::process(EvalResult &Result) { Job &job = Queue.back(); - + switch (job.Kind) { case Job::AnyExprKind: { if (const BinaryOperator *Bop = dyn_cast(job.E)) { @@ -8241,12 +8211,12 @@ void DataRecursiveIntBinOpEvaluator::process(EvalResult &Result) { return; } } - + EvaluateExpr(job.E, Result); Queue.pop_back(); return; } - + case Job::BinOpKind: { const BinaryOperator *Bop = cast(job.E); bool SuppressRHSDiags = false; @@ -8261,7 +8231,7 @@ void DataRecursiveIntBinOpEvaluator::process(EvalResult &Result) { enqueue(Bop->getRHS()); return; } - + case Job::BinOpVisitedLHSKind: { const BinaryOperator *Bop = cast(job.E); EvalResult RHS; @@ -8271,7 +8241,7 @@ void DataRecursiveIntBinOpEvaluator::process(EvalResult &Result) { return; } } - + llvm_unreachable("Invalid Job::Kind!"); } @@ -8783,7 +8753,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { const RecordType *BaseRT = CurrentType->getAs(); if (!BaseRT) return Error(OOE); - + // Add the offset to the base. Result += RL.getBaseClassOffset(cast(BaseRT->getDecl())); break; @@ -9978,7 +9948,7 @@ static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result, IsConst = false; return true; } - + // FIXME: Evaluating values of large array and record types can cause // performance problems. Only do so in C++11 for now. if (Exp->isRValue() && (Exp->getType()->isArrayType() || @@ -10000,7 +9970,7 @@ bool Expr::EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx) const { bool IsConst; if (FastEvaluateAsRValue(this, Result, Ctx, IsConst, false)) return IsConst; - + EvalInfo Info(Ctx, Result, EvalInfo::EM_IgnoreSideEffects); return ::EvaluateAsRValue(Info, this, Result.Val); } diff --git a/lib/AST/ODRHash.cpp b/lib/AST/ODRHash.cpp index d72eebbe8e48f..83168d0924f6b 100644 --- a/lib/AST/ODRHash.cpp +++ b/lib/AST/ODRHash.cpp @@ -169,6 +169,11 @@ public: Inherited::VisitValueDecl(D); } + void VisitParmVarDecl(const ParmVarDecl *D) { + // TODO: Handle default arguments. + Inherited::VisitParmVarDecl(D); + } + void VisitAccessSpecDecl(const AccessSpecDecl *D) { ID.AddInteger(D->getAccess()); Inherited::VisitAccessSpecDecl(D); @@ -202,6 +207,12 @@ public: Hash.AddBoolean(D->isPure()); Hash.AddBoolean(D->isDeletedAsWritten()); + ID.AddInteger(D->param_size()); + + for (auto *Param : D->parameters()) { + Hash.AddSubDecl(Param); + } + Inherited::VisitFunctionDecl(D); } @@ -256,6 +267,11 @@ void ODRHash::AddSubDecl(const Decl *D) { void ODRHash::AddCXXRecordDecl(const CXXRecordDecl *Record) { assert(Record && Record->hasDefinition() && "Expected non-null record to be a definition."); + + if (isa(Record)) { + return; + } + AddDecl(Record); // Filter out sub-Decls which will not be processed in order to get an @@ -315,6 +331,14 @@ public: } } + void AddQualType(QualType T) { + Hash.AddQualType(T); + } + + void VisitQualifiers(Qualifiers Quals) { + ID.AddInteger(Quals.getAsOpaqueValue()); + } + void Visit(const Type *T) { ID.AddInteger(T->getTypeClass()); Inherited::Visit(T); @@ -322,11 +346,69 @@ public: void VisitType(const Type *T) {} + void VisitAdjustedType(const AdjustedType *T) { + AddQualType(T->getOriginalType()); + AddQualType(T->getAdjustedType()); + VisitType(T); + } + + void VisitDecayedType(const DecayedType *T) { + AddQualType(T->getDecayedType()); + AddQualType(T->getPointeeType()); + VisitAdjustedType(T); + } + + void VisitArrayType(const ArrayType *T) { + AddQualType(T->getElementType()); + ID.AddInteger(T->getSizeModifier()); + VisitQualifiers(T->getIndexTypeQualifiers()); + VisitType(T); + } + void VisitConstantArrayType(const ConstantArrayType *T) { + T->getSize().Profile(ID); + VisitArrayType(T); + } + + void VisitDependentSizedArrayType(const DependentSizedArrayType *T) { + AddStmt(T->getSizeExpr()); + VisitArrayType(T); + } + + void VisitIncompleteArrayType(const IncompleteArrayType *T) { + VisitArrayType(T); + } + + void VisitVariableArrayType(const VariableArrayType *T) { + AddStmt(T->getSizeExpr()); + VisitArrayType(T); + } + void VisitBuiltinType(const BuiltinType *T) { ID.AddInteger(T->getKind()); VisitType(T); } + void VisitFunctionType(const FunctionType *T) { + AddQualType(T->getReturnType()); + T->getExtInfo().Profile(ID); + Hash.AddBoolean(T->isConst()); + Hash.AddBoolean(T->isVolatile()); + Hash.AddBoolean(T->isRestrict()); + VisitType(T); + } + + void VisitFunctionNoProtoType(const FunctionNoProtoType *T) { + VisitFunctionType(T); + } + + void VisitFunctionProtoType(const FunctionProtoType *T) { + ID.AddInteger(T->getNumParams()); + for (auto ParamType : T->getParamTypes()) + AddQualType(ParamType); + + VisitFunctionType(T); + } + void VisitTypedefType(const TypedefType *T) { AddDecl(T->getDecl()); Hash.AddQualType(T->getDecl()->getUnderlyingType()); diff --git a/lib/Basic/Diagnostic.cpp b/lib/Basic/Diagnostic.cpp index 350d5477751c6..6bdef78c074f7 100644 --- a/lib/Basic/Diagnostic.cpp +++ b/lib/Basic/Diagnostic.cpp @@ -67,18 +67,12 @@ DiagnosticsEngine::DiagnosticsEngine(IntrusiveRefCntPtr diags, ArgToStringCookie = nullptr; AllExtensionsSilenced = 0; - IgnoreAllWarnings = false; - WarningsAsErrors = false; - EnableAllWarnings = false; - ErrorsAsFatal = false; - FatalsAsError = false; - SuppressSystemWarnings = false; + SuppressAfterFatalError = true; SuppressAllDiagnostics = false; ElideType = true; PrintTemplateTree = false; ShowColors = false; ShowOverloads = Ovl_All; - ExtBehavior = diag::Severity::Ignored; ErrorLimit = 0; TemplateBacktraceLimit = 0; @@ -343,8 +337,8 @@ bool DiagnosticsEngine::setDiagnosticGroupErrorAsFatal(StringRef Group, return setSeverityForGroup(diag::Flavor::WarningOrError, Group, diag::Severity::Fatal); - // Otherwise, we want to set the diagnostic mapping's "no Werror" bit, and - // potentially downgrade anything already mapped to be an error. + // Otherwise, we want to set the diagnostic mapping's "no Wfatal-errors" bit, + // and potentially downgrade anything already mapped to be a fatal error. // Get the diagnostics in this group. SmallVector GroupDiags; diff --git a/lib/Basic/DiagnosticIDs.cpp b/lib/Basic/DiagnosticIDs.cpp index e0580af45b506..2852b40026c2b 100644 --- a/lib/Basic/DiagnosticIDs.cpp +++ b/lib/Basic/DiagnosticIDs.cpp @@ -420,7 +420,7 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, Result = Mapping.getSeverity(); // Upgrade ignored diagnostics if -Weverything is enabled. - if (Diag.EnableAllWarnings && Result == diag::Severity::Ignored && + if (State->EnableAllWarnings && Result == diag::Severity::Ignored && !Mapping.isUser() && getBuiltinDiagClass(DiagID) != CLASS_REMARK) Result = diag::Severity::Warning; @@ -435,7 +435,7 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, // For extension diagnostics that haven't been explicitly mapped, check if we // should upgrade the diagnostic. if (IsExtensionDiag && !Mapping.isUser()) - Result = std::max(Result, Diag.ExtBehavior); + Result = std::max(Result, State->ExtBehavior); // At this point, ignored errors can no longer be upgraded. if (Result == diag::Severity::Ignored) @@ -443,28 +443,24 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, // Honor -w, which is lower in priority than pedantic-errors, but higher than // -Werror. - if (Result == diag::Severity::Warning && Diag.IgnoreAllWarnings) + // FIXME: Under GCC, this also suppresses warnings that have been mapped to + // errors by -W flags and #pragma diagnostic. + if (Result == diag::Severity::Warning && State->IgnoreAllWarnings) return diag::Severity::Ignored; // If -Werror is enabled, map warnings to errors unless explicitly disabled. if (Result == diag::Severity::Warning) { - if (Diag.WarningsAsErrors && !Mapping.hasNoWarningAsError()) + if (State->WarningsAsErrors && !Mapping.hasNoWarningAsError()) Result = diag::Severity::Error; } // If -Wfatal-errors is enabled, map errors to fatal unless explicity // disabled. if (Result == diag::Severity::Error) { - if (Diag.ErrorsAsFatal && !Mapping.hasNoErrorAsFatal()) + if (State->ErrorsAsFatal && !Mapping.hasNoErrorAsFatal()) Result = diag::Severity::Fatal; } - // If explicitly requested, map fatal errors to errors. - if (Result == diag::Severity::Fatal) { - if (Diag.FatalsAsError) - Result = diag::Severity::Error; - } - // Custom diagnostics always are emitted in system headers. bool ShowInSystemHeader = !GetDiagInfo(DiagID) || GetDiagInfo(DiagID)->WarnShowInSystemHeader; @@ -472,7 +468,7 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, // If we are in a system header, we ignore it. We look at the diagnostic class // because we also want to ignore extensions and warnings in -Werror and // -pedantic-errors modes, which *map* warnings/extensions to errors. - if (Diag.SuppressSystemWarnings && !ShowInSystemHeader && Loc.isValid() && + if (State->SuppressSystemWarnings && !ShowInSystemHeader && Loc.isValid() && Diag.getSourceManager().isInSystemHeader( Diag.getSourceManager().getExpansionLoc(Loc))) return diag::Severity::Ignored; @@ -632,7 +628,7 @@ bool DiagnosticIDs::ProcessDiag(DiagnosticsEngine &Diag) const { // If a fatal error has already been emitted, silence all subsequent // diagnostics. - if (Diag.FatalErrorOccurred) { + if (Diag.FatalErrorOccurred && Diag.SuppressAfterFatalError) { if (DiagLevel >= DiagnosticIDs::Error && Diag.Client->IncludeInDiagnosticCounts()) { ++Diag.NumErrors; diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index a5c43fba6d051..c677d9887accc 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -1756,9 +1756,7 @@ void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { ConstructDefaultFnAttrList(F.getName(), F.hasFnAttribute(llvm::Attribute::OptimizeNone), /* AttrOnCallsite = */ false, FuncAttrs); - llvm::AttributeList AS = llvm::AttributeList::get( - getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs); - F.addAttributes(llvm::AttributeList::FunctionIndex, AS); + F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); } void CodeGenModule::ConstructAttributeList( diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index a64303831171d..70b741651fd11 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -51,6 +51,64 @@ struct BinOpInfo { BinaryOperator::Opcode Opcode; // Opcode of BinOp to perform FPOptions FPFeatures; const Expr *E; // Entire expr, for error unsupported. May not be binop. + + /// Check if the binop can result in integer overflow. + bool mayHaveIntegerOverflow() const { + // Without constant input, we can't rule out overflow. + const auto *LHSCI = dyn_cast(LHS); + const auto *RHSCI = dyn_cast(RHS); + if (!LHSCI || !RHSCI) + return true; + + // Assume overflow is possible, unless we can prove otherwise. + bool Overflow = true; + const auto &LHSAP = LHSCI->getValue(); + const auto &RHSAP = RHSCI->getValue(); + if (Opcode == BO_Add) { + if (Ty->hasSignedIntegerRepresentation()) + (void)LHSAP.sadd_ov(RHSAP, Overflow); + else + (void)LHSAP.uadd_ov(RHSAP, Overflow); + } else if (Opcode == BO_Sub) { + if (Ty->hasSignedIntegerRepresentation()) + (void)LHSAP.ssub_ov(RHSAP, Overflow); + else + (void)LHSAP.usub_ov(RHSAP, Overflow); + } else if (Opcode == BO_Mul) { + if (Ty->hasSignedIntegerRepresentation()) + (void)LHSAP.smul_ov(RHSAP, Overflow); + else + (void)LHSAP.umul_ov(RHSAP, Overflow); + } else if (Opcode == BO_Div || Opcode == BO_Rem) { + if (Ty->hasSignedIntegerRepresentation() && !RHSCI->isZero()) + (void)LHSAP.sdiv_ov(RHSAP, Overflow); + else + return false; + } + return Overflow; + } + + /// Check if the binop computes a division or a remainder. + bool isDivisionLikeOperation() const { + return Opcode == BO_Div || Opcode == BO_Rem || Opcode == BO_DivAssign || + Opcode == BO_RemAssign; + } + + /// Check if the binop can result in an integer division by zero. + bool mayHaveIntegerDivisionByZero() const { + if (isDivisionLikeOperation()) + if (auto *CI = dyn_cast(RHS)) + return CI->isZero(); + return true; + } + + /// Check if the binop can result in a float division by zero. + bool mayHaveFloatDivisionByZero() const { + if (isDivisionLikeOperation()) + if (auto *CFP = dyn_cast(RHS)) + return CFP->isZero(); + return true; + } }; static bool MustVisitNullValue(const Expr *E) { @@ -85,9 +143,17 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) { assert((isa(Op.E) || isa(Op.E)) && "Expected a unary or binary operator"); + // If the binop has constant inputs and we can prove there is no overflow, + // we can elide the overflow check. + if (!Op.mayHaveIntegerOverflow()) + return true; + + // If a unary op has a widened operand, the op cannot overflow. if (const auto *UO = dyn_cast(Op.E)) return IsWidenedIntegerOp(Ctx, UO->getSubExpr()); + // We usually don't need overflow checks for binops with widened operands. + // Multiplication with promoted unsigned operands is a special case. const auto *BO = cast(Op.E); auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS()); if (!OptionalLHSTy) @@ -100,14 +166,14 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) { QualType LHSTy = *OptionalLHSTy; QualType RHSTy = *OptionalRHSTy; - // We usually don't need overflow checks for binary operations with widened - // operands. Multiplication with promoted unsigned operands is a special case. + // This is the simple case: binops without unsigned multiplication, and with + // widened operands. No overflow check is needed here. if ((Op.Opcode != BO_Mul && Op.Opcode != BO_MulAssign) || !LHSTy->isUnsignedIntegerType() || !RHSTy->isUnsignedIntegerType()) return true; - // The overflow check can be skipped if either one of the unpromoted types - // are less than half the size of the promoted type. + // For unsigned multiplication the overflow check can be elided if either one + // of the unpromoted types are less than half the size of the promoted type. unsigned PromotedSize = Ctx.getTypeSize(Op.E->getType()); return (2 * Ctx.getTypeSize(LHSTy)) < PromotedSize || (2 * Ctx.getTypeSize(RHSTy)) < PromotedSize; @@ -2377,7 +2443,8 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck( const auto *BO = cast(Ops.E); if (CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow) && Ops.Ty->hasSignedIntegerRepresentation() && - !IsWidenedIntegerOp(CGF.getContext(), BO->getLHS())) { + !IsWidenedIntegerOp(CGF.getContext(), BO->getLHS()) && + Ops.mayHaveIntegerOverflow()) { llvm::IntegerType *Ty = cast(Zero->getType()); llvm::Value *IntMin = @@ -2400,11 +2467,13 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) { CodeGenFunction::SanitizerScope SanScope(&CGF); if ((CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero) || CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) && - Ops.Ty->isIntegerType()) { + Ops.Ty->isIntegerType() && + (Ops.mayHaveIntegerDivisionByZero() || Ops.mayHaveIntegerOverflow())) { llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty)); EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, true); } else if (CGF.SanOpts.has(SanitizerKind::FloatDivideByZero) && - Ops.Ty->isRealFloatingType()) { + Ops.Ty->isRealFloatingType() && + Ops.mayHaveFloatDivisionByZero()) { llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty)); llvm::Value *NonZero = Builder.CreateFCmpUNE(Ops.RHS, Zero); EmitBinOpCheck(std::make_pair(NonZero, SanitizerKind::FloatDivideByZero), @@ -2439,7 +2508,8 @@ Value *ScalarExprEmitter::EmitRem(const BinOpInfo &Ops) { // Rem in C can't be a floating point type: C99 6.5.5p2. if ((CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero) || CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) && - Ops.Ty->isIntegerType()) { + Ops.Ty->isIntegerType() && + (Ops.mayHaveIntegerDivisionByZero() || Ops.mayHaveIntegerOverflow())) { CodeGenFunction::SanitizerScope SanScope(&CGF); llvm::Value *Zero = llvm::Constant::getNullValue(ConvertType(Ops.Ty)); EmitUndefinedBehaviorIntegerDivAndRemCheck(Ops, Zero, false); diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index 9f6ccb4b5d266..821629c50d4a1 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -663,7 +663,7 @@ class CGObjCGNUstep : public CGObjCGNU { } // The lookup function is guaranteed not to capture the receiver pointer. - LookupFn->setDoesNotCapture(1); + LookupFn->addParamAttr(0, llvm::Attribute::NoCapture); llvm::Value *args[] = { EnforceType(Builder, ReceiverPtr.getPointer(), PtrToIdTy), diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 10f1673214546..ff26d80fe2b68 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -892,10 +892,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) B.addAttribute(llvm::Attribute::NoInline); - F->addAttributes( - llvm::AttributeList::FunctionIndex, - llvm::AttributeList::get(F->getContext(), - llvm::AttributeList::FunctionIndex, B)); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); return; } @@ -961,9 +958,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute(llvm::Attribute::MinSize); } - F->addAttributes(llvm::AttributeList::FunctionIndex, - llvm::AttributeList::get( - F->getContext(), llvm::AttributeList::FunctionIndex, B)); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) @@ -2029,9 +2024,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) { llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex); - F->addAttributes(llvm::AttributeList::FunctionIndex, - llvm::AttributeList::get( - VMContext, llvm::AttributeList::FunctionIndex, B)); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); } if (!DontDefer) { diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 94c3880ea26ec..ecd81d84b1fa2 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -1901,10 +1901,7 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, // Now add the 'alignstack' attribute with a value of 16. llvm::AttrBuilder B; B.addStackAlignmentAttr(16); - Fn->addAttributes( - llvm::AttributeList::FunctionIndex, - llvm::AttributeList::get(CGM.getLLVMContext(), - llvm::AttributeList::FunctionIndex, B)); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } if (FD->hasAttr()) { llvm::Function *Fn = cast(GV); @@ -5449,10 +5446,7 @@ public: // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); - Fn->addAttributes( - llvm::AttributeList::FunctionIndex, - llvm::AttributeList::get(CGM.getLLVMContext(), - llvm::AttributeList::FunctionIndex, B)); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } }; diff --git a/lib/Driver/SanitizerArgs.cpp b/lib/Driver/SanitizerArgs.cpp index c9561367a3a87..4dd4929c91489 100644 --- a/lib/Driver/SanitizerArgs.cpp +++ b/lib/Driver/SanitizerArgs.cpp @@ -511,7 +511,6 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, << "-fsanitize-coverage=edge"; // Basic block tracing and 8-bit counters require some type of coverage // enabled. - int CoverageTypes = CoverageFunc | CoverageBB | CoverageEdge; if (CoverageFeatures & CoverageTraceBB) D.Diag(clang::diag::warn_drv_deprecated_arg) << "-fsanitize-coverage=trace-bb" @@ -520,9 +519,18 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, D.Diag(clang::diag::warn_drv_deprecated_arg) << "-fsanitize-coverage=8bit-counters" << "-fsanitize-coverage=trace-pc-guard"; + + int InsertionPointTypes = CoverageFunc | CoverageBB | CoverageEdge; + if ((CoverageFeatures & InsertionPointTypes) && + !(CoverageFeatures &(CoverageTracePC | CoverageTracePCGuard))) { + D.Diag(clang::diag::warn_drv_deprecated_arg) + << "-fsanitize-coverage=[func|bb|edge]" + << "-fsanitize-coverage=[func|bb|edge],[trace-pc-guard|trace-pc]"; + } + // trace-pc w/o func/bb/edge implies edge. if ((CoverageFeatures & (CoverageTracePC | CoverageTracePCGuard)) && - !(CoverageFeatures & CoverageTypes)) + !(CoverageFeatures & InsertionPointTypes)) CoverageFeatures |= CoverageEdge; if (AllAddedKinds & Address) { diff --git a/lib/Parse/ParseOpenMP.cpp b/lib/Parse/ParseOpenMP.cpp index dfb0438ba8ce9..86ac035f3c8c9 100644 --- a/lib/Parse/ParseOpenMP.cpp +++ b/lib/Parse/ParseOpenMP.cpp @@ -1677,6 +1677,30 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind, ConsumeToken(); ConsumeToken(); + Data.MapType = + IsMapClauseModifierToken(Tok) + ? static_cast( + getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok))) + : OMPC_MAP_unknown; + if (Data.MapType == OMPC_MAP_unknown || + Data.MapType == OMPC_MAP_always) + Diag(Tok, diag::err_omp_unknown_map_type); + ConsumeToken(); + } else { + Data.MapType = OMPC_MAP_tofrom; + Data.IsMapTypeImplicit = true; + } + } else if (IsMapClauseModifierToken(PP.LookAhead(0))) { + if (PP.LookAhead(1).is(tok::colon)) { + Data.MapTypeModifier = Data.MapType; + if (Data.MapTypeModifier != OMPC_MAP_always) { + Diag(Tok, diag::err_omp_unknown_map_type_modifier); + Data.MapTypeModifier = OMPC_MAP_unknown; + } else + MapTypeModifierSpecified = true; + + ConsumeToken(); + Data.MapType = IsMapClauseModifierToken(Tok) ? static_cast( diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index b3ba86e0685bb..a206100b89eba 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -759,7 +759,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, break; case Builtin::BI__builtin_stdarg_start: case Builtin::BI__builtin_va_start: - if (SemaBuiltinVAStart(TheCall)) + if (SemaBuiltinVAStart(BuiltinID, TheCall)) return ExprError(); break; case Builtin::BI__va_start: { @@ -770,7 +770,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, return ExprError(); break; default: - if (SemaBuiltinVAStart(TheCall)) + if (SemaBuiltinVAStart(BuiltinID, TheCall)) return ExprError(); break; } @@ -2090,7 +2090,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return SemaBuiltinCpuSupports(*this, TheCall); if (BuiltinID == X86::BI__builtin_ms_va_start) - return SemaBuiltinMSVAStart(TheCall); + return SemaBuiltinVAStart(BuiltinID, TheCall); // If the intrinsic has rounding or SAE make sure its valid. if (CheckX86BuiltinRoundingOrSAE(BuiltinID, TheCall)) @@ -3611,11 +3611,81 @@ ExprResult Sema::CheckOSLogFormatStringArg(Expr *Arg) { return Result; } +/// Check that the user is calling the appropriate va_start builtin for the +/// target and calling convention. +static bool checkVAStartABI(Sema &S, unsigned BuiltinID, Expr *Fn) { + const llvm::Triple &TT = S.Context.getTargetInfo().getTriple(); + bool IsX64 = TT.getArch() == llvm::Triple::x86_64; + bool IsWindows = TT.isOSWindows(); + bool IsMSVAStart = BuiltinID == X86::BI__builtin_ms_va_start; + if (IsX64) { + clang::CallingConv CC = CC_C; + if (const FunctionDecl *FD = S.getCurFunctionDecl()) + CC = FD->getType()->getAs()->getCallConv(); + if (IsMSVAStart) { + // Don't allow this in System V ABI functions. + if (CC == CC_X86_64SysV || (!IsWindows && CC != CC_X86_64Win64)) + return S.Diag(Fn->getLocStart(), + diag::err_ms_va_start_used_in_sysv_function); + } else { + // On x86-64 Unix, don't allow this in Win64 ABI functions. + // On x64 Windows, don't allow this in System V ABI functions. + // (Yes, that means there's no corresponding way to support variadic + // System V ABI functions on Windows.) + if ((IsWindows && CC == CC_X86_64SysV) || + (!IsWindows && CC == CC_X86_64Win64)) + return S.Diag(Fn->getLocStart(), + diag::err_va_start_used_in_wrong_abi_function) + << !IsWindows; + } + return false; + } + + if (IsMSVAStart) + return S.Diag(Fn->getLocStart(), diag::err_x86_builtin_64_only); + return false; +} + +static bool checkVAStartIsInVariadicFunction(Sema &S, Expr *Fn, + ParmVarDecl **LastParam = nullptr) { + // Determine whether the current function, block, or obj-c method is variadic + // and get its parameter list. + bool IsVariadic = false; + ArrayRef Params; + if (BlockScopeInfo *CurBlock = S.getCurBlock()) { + IsVariadic = CurBlock->TheDecl->isVariadic(); + Params = CurBlock->TheDecl->parameters(); + } else if (FunctionDecl *FD = S.getCurFunctionDecl()) { + IsVariadic = FD->isVariadic(); + Params = FD->parameters(); + } else if (ObjCMethodDecl *MD = S.getCurMethodDecl()) { + IsVariadic = MD->isVariadic(); + // FIXME: This isn't correct for methods (results in bogus warning). + Params = MD->parameters(); + } else { + llvm_unreachable("unknown va_start context"); + } + + if (!IsVariadic) { + S.Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); + return true; + } + + if (LastParam) + *LastParam = Params.empty() ? nullptr : Params.back(); + + return false; +} + /// Check the arguments to '__builtin_va_start' or '__builtin_ms_va_start' /// for validity. Emit an error and return true on failure; return false /// on success. -bool Sema::SemaBuiltinVAStartImpl(CallExpr *TheCall) { +bool Sema::SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall) { Expr *Fn = TheCall->getCallee(); + + if (checkVAStartABI(*this, BuiltinID, Fn)) + return true; + if (TheCall->getNumArgs() > 2) { Diag(TheCall->getArg(2)->getLocStart(), diag::err_typecheck_call_too_many_args) @@ -3636,20 +3706,10 @@ bool Sema::SemaBuiltinVAStartImpl(CallExpr *TheCall) { if (checkBuiltinArgument(*this, TheCall, 0)) return true; - // Determine whether the current function is variadic or not. - BlockScopeInfo *CurBlock = getCurBlock(); - bool isVariadic; - if (CurBlock) - isVariadic = CurBlock->TheDecl->isVariadic(); - else if (FunctionDecl *FD = getCurFunctionDecl()) - isVariadic = FD->isVariadic(); - else - isVariadic = getCurMethodDecl()->isVariadic(); - - if (!isVariadic) { - Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); + // Check that the current function is variadic, and get its last parameter. + ParmVarDecl *LastParam; + if (checkVAStartIsInVariadicFunction(*this, Fn, &LastParam)) return true; - } // Verify that the second argument to the builtin is the last argument of the // current function or method. @@ -3664,16 +3724,7 @@ bool Sema::SemaBuiltinVAStartImpl(CallExpr *TheCall) { if (const DeclRefExpr *DR = dyn_cast(Arg)) { if (const ParmVarDecl *PV = dyn_cast(DR->getDecl())) { - // FIXME: This isn't correct for methods (results in bogus warning). - // Get the last formal in the current function. - const ParmVarDecl *LastArg; - if (CurBlock) - LastArg = CurBlock->TheDecl->parameters().back(); - else if (FunctionDecl *FD = getCurFunctionDecl()) - LastArg = FD->parameters().back(); - else - LastArg = getCurMethodDecl()->parameters().back(); - SecondArgIsLastNamedArgument = PV == LastArg; + SecondArgIsLastNamedArgument = PV == LastParam; Type = PV->getType(); ParamLoc = PV->getLocation(); @@ -3708,48 +3759,6 @@ bool Sema::SemaBuiltinVAStartImpl(CallExpr *TheCall) { return false; } -/// Check the arguments to '__builtin_va_start' for validity, and that -/// it was called from a function of the native ABI. -/// Emit an error and return true on failure; return false on success. -bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { - // On x86-64 Unix, don't allow this in Win64 ABI functions. - // On x64 Windows, don't allow this in System V ABI functions. - // (Yes, that means there's no corresponding way to support variadic - // System V ABI functions on Windows.) - if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64) { - unsigned OS = Context.getTargetInfo().getTriple().getOS(); - clang::CallingConv CC = CC_C; - if (const FunctionDecl *FD = getCurFunctionDecl()) - CC = FD->getType()->getAs()->getCallConv(); - if ((OS == llvm::Triple::Win32 && CC == CC_X86_64SysV) || - (OS != llvm::Triple::Win32 && CC == CC_X86_64Win64)) - return Diag(TheCall->getCallee()->getLocStart(), - diag::err_va_start_used_in_wrong_abi_function) - << (OS != llvm::Triple::Win32); - } - return SemaBuiltinVAStartImpl(TheCall); -} - -/// Check the arguments to '__builtin_ms_va_start' for validity, and that -/// it was called from a Win64 ABI function. -/// Emit an error and return true on failure; return false on success. -bool Sema::SemaBuiltinMSVAStart(CallExpr *TheCall) { - // This only makes sense for x86-64. - const llvm::Triple &TT = Context.getTargetInfo().getTriple(); - Expr *Callee = TheCall->getCallee(); - if (TT.getArch() != llvm::Triple::x86_64) - return Diag(Callee->getLocStart(), diag::err_x86_builtin_32_bit_tgt); - // Don't allow this in System V ABI functions. - clang::CallingConv CC = CC_C; - if (const FunctionDecl *FD = getCurFunctionDecl()) - CC = FD->getType()->getAs()->getCallConv(); - if (CC == CC_X86_64SysV || - (TT.getOS() != llvm::Triple::Win32 && CC != CC_X86_64Win64)) - return Diag(Callee->getLocStart(), - diag::err_ms_va_start_used_in_sysv_function); - return SemaBuiltinVAStartImpl(TheCall); -} - bool Sema::SemaBuiltinVAStartARM(CallExpr *Call) { // void __va_start(va_list *ap, const char *named_addr, size_t slot_size, // const char *named_addr); @@ -3761,26 +3770,14 @@ bool Sema::SemaBuiltinVAStartARM(CallExpr *Call) { diag::err_typecheck_call_too_few_args_at_least) << 0 /*function call*/ << 3 << Call->getNumArgs(); - // Determine whether the current function is variadic or not. - bool IsVariadic; - if (BlockScopeInfo *CurBlock = getCurBlock()) - IsVariadic = CurBlock->TheDecl->isVariadic(); - else if (FunctionDecl *FD = getCurFunctionDecl()) - IsVariadic = FD->isVariadic(); - else if (ObjCMethodDecl *MD = getCurMethodDecl()) - IsVariadic = MD->isVariadic(); - else - llvm_unreachable("unexpected statement type"); - - if (!IsVariadic) { - Diag(Func->getLocStart(), diag::err_va_start_used_in_non_variadic_function); - return true; - } - // Type-check the first argument normally. if (checkBuiltinArgument(*this, Call, 0)) return true; + // Check that the current function is variadic. + if (checkVAStartIsInVariadicFunction(*this, Func)) + return true; + const struct { unsigned ArgNo; QualType Type; diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp index 054ccb64cbec2..d4c0783638d11 100644 --- a/lib/Sema/SemaDecl.cpp +++ b/lib/Sema/SemaDecl.cpp @@ -2951,7 +2951,8 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, // Merge ns_returns_retained attribute. if (OldTypeInfo.getProducesResult() != NewTypeInfo.getProducesResult()) { if (NewTypeInfo.getProducesResult()) { - Diag(New->getLocation(), diag::err_returns_retained_mismatch); + Diag(New->getLocation(), diag::err_function_attribute_mismatch) + << "'ns_returns_retained'"; Diag(OldLocation, diag::note_previous_declaration); return true; } diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index f7307f35568de..d63151ef67599 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -15372,7 +15372,7 @@ static ExprResult diagnoseUnknownAnyExpr(Sema &S, Expr *E) { } /// Check for operands with placeholder types and complain if found. -/// Returns true if there was an error and no recovery was possible. +/// Returns ExprError() if there was an error and no recovery was possible. ExprResult Sema::CheckPlaceholderExpr(Expr *E) { if (!getLangOpts().CPlusPlus) { // C cannot handle TypoExpr nodes on either side of a binop because it diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp index d65570fcef766..9b88cddbc9698 100644 --- a/lib/Sema/SemaExprCXX.cpp +++ b/lib/Sema/SemaExprCXX.cpp @@ -901,17 +901,36 @@ static QualType adjustCVQualifiersForCXXThisWithinLambda( // capturing lamdbda's call operator. // - // The issue is that we cannot rely entirely on the FunctionScopeInfo stack - // since ScopeInfos are pushed on during parsing and treetransforming. But - // since a generic lambda's call operator can be instantiated anywhere (even - // end of the TU) we need to be able to examine its enclosing lambdas and so - // we use the DeclContext to get a hold of the closure-class and query it for - // capture information. The reason we don't just resort to always using the - // DeclContext chain is that it is only mature for lambda expressions - // enclosing generic lambda's call operators that are being instantiated. - + // Since the FunctionScopeInfo stack is representative of the lexical + // nesting of the lambda expressions during initial parsing (and is the best + // place for querying information about captures about lambdas that are + // partially processed) and perhaps during instantiation of function templates + // that contain lambda expressions that need to be transformed BUT not + // necessarily during instantiation of a nested generic lambda's function call + // operator (which might even be instantiated at the end of the TU) - at which + // time the DeclContext tree is mature enough to query capture information + // reliably - we use a two pronged approach to walk through all the lexically + // enclosing lambda expressions: + // + // 1) Climb down the FunctionScopeInfo stack as long as each item represents + // a Lambda (i.e. LambdaScopeInfo) AND each LSI's 'closure-type' is lexically + // enclosed by the call-operator of the LSI below it on the stack (while + // tracking the enclosing DC for step 2 if needed). Note the topmost LSI on + // the stack represents the innermost lambda. + // + // 2) If we run out of enclosing LSI's, check if the enclosing DeclContext + // represents a lambda's call operator. If it does, we must be instantiating + // a generic lambda's call operator (represented by the Current LSI, and + // should be the only scenario where an inconsistency between the LSI and the + // DeclContext should occur), so climb out the DeclContexts if they + // represent lambdas, while querying the corresponding closure types + // regarding capture information. + + // 1) Climb down the function scope info stack. for (int I = FunctionScopes.size(); - I-- && isa(FunctionScopes[I]); + I-- && isa(FunctionScopes[I]) && + (!CurLSI || !CurLSI->Lambda || CurLSI->Lambda->getDeclContext() == + cast(FunctionScopes[I])->CallOperator); CurDC = getLambdaAwareParentOfDeclContext(CurDC)) { CurLSI = cast(FunctionScopes[I]); @@ -927,11 +946,17 @@ static QualType adjustCVQualifiersForCXXThisWithinLambda( return ASTCtx.getPointerType(ClassType); } } - // We've run out of ScopeInfos but check if CurDC is a lambda (which can - // happen during instantiation of generic lambdas) + + // 2) We've run out of ScopeInfos but check if CurDC is a lambda (which can + // happen during instantiation of its nested generic lambda call operator) if (isLambdaCallOperator(CurDC)) { - assert(CurLSI); - assert(isGenericLambdaCallOperatorSpecialization(CurLSI->CallOperator)); + assert(CurLSI && "While computing 'this' capture-type for a generic " + "lambda, we must have a corresponding LambdaScopeInfo"); + assert(isGenericLambdaCallOperatorSpecialization(CurLSI->CallOperator) && + "While computing 'this' capture-type for a generic lambda, when we " + "run out of enclosing LSI's, yet the enclosing DC is a " + "lambda-call-operator we must be (i.e. Current LSI) in a generic " + "lambda call oeprator"); assert(CurDC == getLambdaAwareParentOfDeclContext(CurLSI->CallOperator)); auto IsThisCaptured = diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index 5312ad118d5bc..61b5a822c5520 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -5531,14 +5531,8 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { "Invalid data, not enough diag/map pairs"); while (Size--) { unsigned DiagID = Record[Idx++]; - unsigned SeverityAndUpgradedFromWarning = Record[Idx++]; - bool WasUpgradedFromWarning = - DiagnosticMapping::deserializeUpgradedFromWarning( - SeverityAndUpgradedFromWarning); DiagnosticMapping NewMapping = - Diag.makeUserMapping(DiagnosticMapping::deserializeSeverity( - SeverityAndUpgradedFromWarning), - Loc); + DiagnosticMapping::deserialize(Record[Idx++]); if (!NewMapping.isPragma() && !IncludeNonPragmaStates) continue; @@ -5547,14 +5541,12 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { // If this mapping was specified as a warning but the severity was // upgraded due to diagnostic settings, simulate the current diagnostic // settings (and use a warning). - if (WasUpgradedFromWarning && !Mapping.isErrorOrFatal()) { - Mapping = Diag.makeUserMapping(diag::Severity::Warning, Loc); - continue; + if (NewMapping.wasUpgradedFromWarning() && !Mapping.isErrorOrFatal()) { + NewMapping.setSeverity(diag::Severity::Warning); + NewMapping.setUpgradedFromWarning(false); } - // Use the deserialized mapping verbatim. Mapping = NewMapping; - Mapping.setUpgradedFromWarning(WasUpgradedFromWarning); } return NewState; }; @@ -5569,22 +5561,36 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { DiagStates.push_back(FirstState); // Skip the initial diagnostic state from the serialized module. - assert(Record[0] == 0 && + assert(Record[1] == 0 && "Invalid data, unexpected backref in initial state"); - Idx = 2 + Record[1] * 2; + Idx = 3 + Record[2] * 2; assert(Idx < Record.size() && "Invalid data, not enough state change pairs in initial state"); + } else if (F.isModule()) { + // For an explicit module, preserve the flags from the module build + // command line (-w, -Weverything, -Werror, ...) along with any explicit + // -Wblah flags. + unsigned Flags = Record[Idx++]; + DiagState Initial; + Initial.SuppressSystemWarnings = Flags & 1; Flags >>= 1; + Initial.ErrorsAsFatal = Flags & 1; Flags >>= 1; + Initial.WarningsAsErrors = Flags & 1; Flags >>= 1; + Initial.EnableAllWarnings = Flags & 1; Flags >>= 1; + Initial.IgnoreAllWarnings = Flags & 1; Flags >>= 1; + Initial.ExtBehavior = (diag::Severity)Flags; + FirstState = ReadDiagState(Initial, SourceLocation(), true); + + // Set up the root buffer of the module to start with the initial + // diagnostic state of the module itself, to cover files that contain no + // explicit transitions (for which we did not serialize anything). + Diag.DiagStatesByLoc.Files[F.OriginalSourceFileID] + .StateTransitions.push_back({FirstState, 0}); } else { - FirstState = ReadDiagState( - F.isModule() ? DiagState() : *Diag.DiagStatesByLoc.CurDiagState, - SourceLocation(), F.isModule()); - - // For an explicit module, set up the root buffer of the module to start - // with the initial diagnostic state of the module itself, to cover files - // that contain no explicit transitions. - if (F.isModule()) - Diag.DiagStatesByLoc.Files[F.OriginalSourceFileID] - .StateTransitions.push_back({FirstState, 0}); + // For prefix ASTs, start with whatever the user configured on the + // command line. + Idx++; // Skip flags. + FirstState = ReadDiagState(*Diag.DiagStatesByLoc.CurDiagState, + SourceLocation(), false); } // Read the state transitions. @@ -9316,6 +9322,9 @@ void ASTReader::diagnoseOdrViolations() { MethodVolatile, MethodConst, MethodInline, + MethodNumberParameters, + MethodParameterType, + MethodParameterName, }; // These lambdas have the common portions of the ODR diagnostics. This @@ -9346,6 +9355,12 @@ void ASTReader::diagnoseOdrViolations() { return Hash.CalculateHash(); }; + auto ComputeQualTypeODRHash = [&Hash](QualType Ty) { + Hash.clear(); + Hash.AddQualType(Ty); + return Hash.CalculateHash(); + }; + switch (FirstDiffType) { case Other: case EndOfClass: @@ -9640,6 +9655,76 @@ void ASTReader::diagnoseOdrViolations() { break; } + const unsigned FirstNumParameters = FirstMethod->param_size(); + const unsigned SecondNumParameters = SecondMethod->param_size(); + if (FirstNumParameters != SecondNumParameters) { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodNumberParameters) + << FirstName << FirstNumParameters; + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodNumberParameters) + << SecondName << SecondNumParameters; + Diagnosed = true; + break; + } + + // Need this status boolean to know when break out of the switch. + bool ParameterMismatch = false; + for (unsigned I = 0; I < FirstNumParameters; ++I) { + const ParmVarDecl *FirstParam = FirstMethod->getParamDecl(I); + const ParmVarDecl *SecondParam = SecondMethod->getParamDecl(I); + + QualType FirstParamType = FirstParam->getType(); + QualType SecondParamType = SecondParam->getType(); + if (FirstParamType != SecondParamType && + ComputeQualTypeODRHash(FirstParamType) != + ComputeQualTypeODRHash(SecondParamType)) { + if (const DecayedType *ParamDecayedType = + FirstParamType->getAs()) { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodParameterType) + << FirstName << (I + 1) << FirstParamType << true + << ParamDecayedType->getOriginalType(); + } else { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodParameterType) + << FirstName << (I + 1) << FirstParamType << false; + } + + if (const DecayedType *ParamDecayedType = + SecondParamType->getAs()) { + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodParameterType) + << SecondName << (I + 1) << SecondParamType << true + << ParamDecayedType->getOriginalType(); + } else { + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodParameterType) + << SecondName << (I + 1) << SecondParamType << false; + } + ParameterMismatch = true; + break; + } + + DeclarationName FirstParamName = FirstParam->getDeclName(); + DeclarationName SecondParamName = SecondParam->getDeclName(); + if (FirstParamName != SecondParamName) { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodParameterName) + << FirstName << (I + 1) << FirstParamName; + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodParameterName) + << SecondName << (I + 1) << SecondParamName; + ParameterMismatch = true; + break; + } + } + + if (ParameterMismatch) { + Diagnosed = true; + break; + } + break; } } diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp index 80bf65666ecec..8e4b217a44cd1 100644 --- a/lib/Serialization/ASTWriter.cpp +++ b/lib/Serialization/ASTWriter.cpp @@ -2868,8 +2868,27 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, unsigned CurrID = 0; RecordData Record; + auto EncodeDiagStateFlags = + [](const DiagnosticsEngine::DiagState *DS) -> unsigned { + unsigned Result = (unsigned)DS->ExtBehavior; + for (unsigned Val : + {(unsigned)DS->IgnoreAllWarnings, (unsigned)DS->EnableAllWarnings, + (unsigned)DS->WarningsAsErrors, (unsigned)DS->ErrorsAsFatal, + (unsigned)DS->SuppressSystemWarnings}) + Result = (Result << 1) | Val; + return Result; + }; + + unsigned Flags = EncodeDiagStateFlags(Diag.DiagStatesByLoc.FirstDiagState); + Record.push_back(Flags); + auto AddDiagState = [&](const DiagnosticsEngine::DiagState *State, bool IncludeNonPragmaStates) { + // Ensure that the diagnostic state wasn't modified since it was created. + // We will not correctly round-trip this information otherwise. + assert(Flags == EncodeDiagStateFlags(State) && + "diag state flags vary in single AST file"); + unsigned &DiagStateID = DiagStateIDMap[State]; Record.push_back(DiagStateID); @@ -2882,7 +2901,7 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, for (const auto &I : *State) { if (I.second.isPragma() || IncludeNonPragmaStates) { Record.push_back(I.first); - Record.push_back(I.second.serializeBits()); + Record.push_back(I.second.serialize()); } } // Update the placeholder. diff --git a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index 9a7e83c14923c..851114004b963 100644 --- a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -19,6 +19,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" @@ -1753,8 +1754,8 @@ void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal, if (ExplodedNode *N = C.generateErrorNode()) { if (!BT_BadFree[*CheckKind]) - BT_BadFree[*CheckKind].reset( - new BugType(CheckNames[*CheckKind], "Bad free", "Memory Error")); + BT_BadFree[*CheckKind].reset(new BugType( + CheckNames[*CheckKind], "Bad free", categories::MemoryError)); SmallString<100> buf; llvm::raw_svector_ostream os(buf); @@ -1798,8 +1799,8 @@ void MallocChecker::ReportFreeAlloca(CheckerContext &C, SVal ArgVal, if (ExplodedNode *N = C.generateErrorNode()) { if (!BT_FreeAlloca[*CheckKind]) - BT_FreeAlloca[*CheckKind].reset( - new BugType(CheckNames[*CheckKind], "Free alloca()", "Memory Error")); + BT_FreeAlloca[*CheckKind].reset(new BugType( + CheckNames[*CheckKind], "Free alloca()", categories::MemoryError)); auto R = llvm::make_unique( *BT_FreeAlloca[*CheckKind], @@ -1824,7 +1825,7 @@ void MallocChecker::ReportMismatchedDealloc(CheckerContext &C, if (!BT_MismatchedDealloc) BT_MismatchedDealloc.reset( new BugType(CheckNames[CK_MismatchedDeallocatorChecker], - "Bad deallocator", "Memory Error")); + "Bad deallocator", categories::MemoryError)); SmallString<100> buf; llvm::raw_svector_ostream os(buf); @@ -1884,8 +1885,8 @@ void MallocChecker::ReportOffsetFree(CheckerContext &C, SVal ArgVal, return; if (!BT_OffsetFree[*CheckKind]) - BT_OffsetFree[*CheckKind].reset( - new BugType(CheckNames[*CheckKind], "Offset free", "Memory Error")); + BT_OffsetFree[*CheckKind].reset(new BugType( + CheckNames[*CheckKind], "Offset free", categories::MemoryError)); SmallString<100> buf; llvm::raw_svector_ostream os(buf); @@ -1936,7 +1937,7 @@ void MallocChecker::ReportUseAfterFree(CheckerContext &C, SourceRange Range, if (ExplodedNode *N = C.generateErrorNode()) { if (!BT_UseFree[*CheckKind]) BT_UseFree[*CheckKind].reset(new BugType( - CheckNames[*CheckKind], "Use-after-free", "Memory Error")); + CheckNames[*CheckKind], "Use-after-free", categories::MemoryError)); auto R = llvm::make_unique(*BT_UseFree[*CheckKind], "Use of memory after it is freed", N); @@ -1962,8 +1963,8 @@ void MallocChecker::ReportDoubleFree(CheckerContext &C, SourceRange Range, if (ExplodedNode *N = C.generateErrorNode()) { if (!BT_DoubleFree[*CheckKind]) - BT_DoubleFree[*CheckKind].reset( - new BugType(CheckNames[*CheckKind], "Double free", "Memory Error")); + BT_DoubleFree[*CheckKind].reset(new BugType( + CheckNames[*CheckKind], "Double free", categories::MemoryError)); auto R = llvm::make_unique( *BT_DoubleFree[*CheckKind], @@ -1991,7 +1992,8 @@ void MallocChecker::ReportDoubleDelete(CheckerContext &C, SymbolRef Sym) const { if (ExplodedNode *N = C.generateErrorNode()) { if (!BT_DoubleDelete) BT_DoubleDelete.reset(new BugType(CheckNames[CK_NewDeleteChecker], - "Double delete", "Memory Error")); + "Double delete", + categories::MemoryError)); auto R = llvm::make_unique( *BT_DoubleDelete, "Attempt to delete released memory", N); @@ -2017,8 +2019,9 @@ void MallocChecker::ReportUseZeroAllocated(CheckerContext &C, if (ExplodedNode *N = C.generateErrorNode()) { if (!BT_UseZerroAllocated[*CheckKind]) - BT_UseZerroAllocated[*CheckKind].reset(new BugType( - CheckNames[*CheckKind], "Use of zero allocated", "Memory Error")); + BT_UseZerroAllocated[*CheckKind].reset( + new BugType(CheckNames[*CheckKind], "Use of zero allocated", + categories::MemoryError)); auto R = llvm::make_unique(*BT_UseZerroAllocated[*CheckKind], "Use of zero-allocated memory", N); @@ -2253,8 +2256,8 @@ void MallocChecker::reportLeak(SymbolRef Sym, ExplodedNode *N, assert(N); if (!BT_Leak[*CheckKind]) { - BT_Leak[*CheckKind].reset( - new BugType(CheckNames[*CheckKind], "Memory leak", "Memory Error")); + BT_Leak[*CheckKind].reset(new BugType(CheckNames[*CheckKind], "Memory leak", + categories::MemoryError)); // Leaks should not be reported if they are post-dominated by a sink: // (1) Sinks are higher importance bugs. // (2) NoReturnFunctionChecker uses sink nodes to represent paths ending diff --git a/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp index 21527d8c347a4..41999d2527639 100644 --- a/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -178,7 +178,7 @@ private: const MemRegion *Region, BugReporter &BR, const Stmt *ValueExpr = nullptr) const { if (!BT) - BT.reset(new BugType(this, "Nullability", "Memory error")); + BT.reset(new BugType(this, "Nullability", categories::MemoryError)); auto R = llvm::make_unique(*BT, Msg, N); if (Region) { diff --git a/lib/StaticAnalyzer/Checkers/ValistChecker.cpp b/lib/StaticAnalyzer/Checkers/ValistChecker.cpp index d12ba62580739..06c4ef71d80bc 100644 --- a/lib/StaticAnalyzer/Checkers/ValistChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/ValistChecker.cpp @@ -256,7 +256,7 @@ void ValistChecker::reportUninitializedAccess(const MemRegion *VAList, if (!BT_uninitaccess) BT_uninitaccess.reset(new BugType(CheckNames[CK_Uninitialized], "Uninitialized va_list", - "Memory Error")); + categories::MemoryError)); auto R = llvm::make_unique(*BT_uninitaccess, Msg, N); R->markInteresting(VAList); R->addVisitor(llvm::make_unique(VAList)); @@ -274,7 +274,8 @@ void ValistChecker::reportLeakedVALists(const RegionVector &LeakedVALists, for (auto Reg : LeakedVALists) { if (!BT_leakedvalist) { BT_leakedvalist.reset(new BugType(CheckNames[CK_Unterminated], - "Leaked va_list", "Memory Error")); + "Leaked va_list", + categories::MemoryError)); BT_leakedvalist->setSuppressOnSink(true); } diff --git a/lib/StaticAnalyzer/Core/CommonBugCategories.cpp b/lib/StaticAnalyzer/Core/CommonBugCategories.cpp index 3cb9323563b31..421dfa48c97bd 100644 --- a/lib/StaticAnalyzer/Core/CommonBugCategories.cpp +++ b/lib/StaticAnalyzer/Core/CommonBugCategories.cpp @@ -16,5 +16,6 @@ const char * const CoreFoundationObjectiveC = "Core Foundation/Objective-C"; const char * const LogicError = "Logic error"; const char * const MemoryCoreFoundationObjectiveC = "Memory (Core Foundation/Objective-C)"; +const char * const MemoryError = "Memory error"; const char * const UnixAPI = "Unix API"; }}} diff --git a/test/Analysis/MismatchedDeallocator-path-notes.cpp b/test/Analysis/MismatchedDeallocator-path-notes.cpp index 118f23bca114d..1354386fc8b7f 100644 --- a/test/Analysis/MismatchedDeallocator-path-notes.cpp +++ b/test/Analysis/MismatchedDeallocator-path-notes.cpp @@ -287,7 +287,7 @@ void test() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionMemory allocated by 'new[]' should be deallocated by 'delete[]', not 'delete' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeBad deallocator // CHECK-NEXT: check_nameunix.MismatchedDeallocator // CHECK-NEXT: diff --git a/test/Analysis/NewDelete-path-notes.cpp b/test/Analysis/NewDelete-path-notes.cpp index 115a4addcaa89..ac760ca60eb43 100644 --- a/test/Analysis/NewDelete-path-notes.cpp +++ b/test/Analysis/NewDelete-path-notes.cpp @@ -257,7 +257,7 @@ void test(Odd *odd) { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionAttempt to free released memory -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeDouble free // CHECK-NEXT: check_namecplusplus.NewDelete // CHECK-NEXT: @@ -475,7 +475,7 @@ void test(Odd *odd) { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionAttempt to free released memory -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeDouble free // CHECK-NEXT: check_namecplusplus.NewDelete // CHECK-NEXT: diff --git a/test/Analysis/diagnostics/report-issues-within-main-file.cpp b/test/Analysis/diagnostics/report-issues-within-main-file.cpp index 784fdba972df3..e1dccc8e32048 100644 --- a/test/Analysis/diagnostics/report-issues-within-main-file.cpp +++ b/test/Analysis/diagnostics/report-issues-within-main-file.cpp @@ -945,7 +945,7 @@ void callInMacroArg() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionMemory allocated by 'new[]' should be deallocated by 'delete[]', not 'delete' (within a call to '~auto_ptr') -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeBad deallocator // CHECK-NEXT: check_nameunix.MismatchedDeallocator // CHECK-NEXT: diff --git a/test/Analysis/edges-new.mm b/test/Analysis/edges-new.mm index 217cd4aa467fe..47a125ab0901f 100644 --- a/test/Analysis/edges-new.mm +++ b/test/Analysis/edges-new.mm @@ -20042,7 +20042,7 @@ namespace rdar14960554 { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'buf' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -20284,7 +20284,7 @@ namespace rdar14960554 { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionMemory allocated by 'new[]' should be deallocated by 'delete[]', not 'delete' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeBad deallocator // CHECK-NEXT: check_nameunix.MismatchedDeallocator // CHECK-NEXT: diff --git a/test/Analysis/malloc-plist.c b/test/Analysis/malloc-plist.c index 26aea1604517a..e2062e8582290 100644 --- a/test/Analysis/malloc-plist.c +++ b/test/Analysis/malloc-plist.c @@ -421,7 +421,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'p' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -586,7 +586,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'A' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -974,7 +974,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'buf' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -1376,7 +1376,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'buf' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -1962,7 +1962,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionUse of memory after it is freed -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeUse-after-free // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -2524,7 +2524,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'buf' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -2795,7 +2795,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'v' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -3144,7 +3144,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionUse of memory after it is freed -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeUse-after-free // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -3309,7 +3309,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'm' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -3517,7 +3517,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'x' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -3725,7 +3725,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'x' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -4030,7 +4030,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'x' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -4335,7 +4335,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'x' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -4543,7 +4543,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'x' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -4751,7 +4751,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'x' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -4988,7 +4988,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential memory leak -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -5225,7 +5225,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential memory leak -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: @@ -5496,7 +5496,7 @@ void testMyMalloc() { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential memory leak -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: diff --git a/test/Analysis/plist-macros.cpp b/test/Analysis/plist-macros.cpp index 594cfdc6efc58..18d3ce11e67ec 100644 --- a/test/Analysis/plist-macros.cpp +++ b/test/Analysis/plist-macros.cpp @@ -218,7 +218,7 @@ void test2(int *p) { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionMemory allocated by malloc() should be deallocated by free(), not 'delete' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeBad deallocator // CHECK-NEXT: check_nameunix.MismatchedDeallocator // CHECK-NEXT: @@ -315,7 +315,7 @@ void test2(int *p) { // CHECK-NEXT: // CHECK-NEXT: // CHECK-NEXT: descriptionPotential leak of memory pointed to by 'x' -// CHECK-NEXT: categoryMemory Error +// CHECK-NEXT: categoryMemory error // CHECK-NEXT: typeMemory leak // CHECK-NEXT: check_nameunix.Malloc // CHECK-NEXT: diff --git a/test/CodeGen/PR32874.c b/test/CodeGen/PR32874.c new file mode 100644 index 0000000000000..f8aa1c2a66f4f --- /dev/null +++ b/test/CodeGen/PR32874.c @@ -0,0 +1,61 @@ +// RUN: %clang_cc1 -x c -S -emit-llvm -o - -triple x86_64-apple-darwin10 %s \ +// RUN: -w -fsanitize=signed-integer-overflow,unsigned-integer-overflow,integer-divide-by-zero,float-divide-by-zero \ +// RUN: | FileCheck %s + +// CHECK-LABEL: define void @foo +// CHECK-NOT: !nosanitize +void foo(const int *p) { + // __builtin_prefetch expects its optional arguments to be constant integers. + // Check that ubsan does not instrument any safe arithmetic performed in + // operands to __builtin_prefetch. (A clang frontend check should reject + // unsafe arithmetic in these operands.) + + __builtin_prefetch(p, 0 + 1, 0 + 3); + __builtin_prefetch(p, 1 - 0, 3 - 0); + __builtin_prefetch(p, 1 * 1, 1 * 3); + __builtin_prefetch(p, 1 / 1, 3 / 1); + __builtin_prefetch(p, 3 % 2, 3 % 1); + + __builtin_prefetch(p, 0U + 1U, 0U + 3U); + __builtin_prefetch(p, 1U - 0U, 3U - 0U); + __builtin_prefetch(p, 1U * 1U, 1U * 3U); + __builtin_prefetch(p, 1U / 1U, 3U / 1U); + __builtin_prefetch(p, 3U % 2U, 3U % 1U); +} + +// CHECK-LABEL: define void @ub_constant_arithmetic +void ub_constant_arithmetic() { + // Check that we still instrument unsafe arithmetic, even if it is known to + // be unsafe at compile time. + + int INT_MIN = 0xffffffff; + int INT_MAX = 0x7fffffff; + + // CHECK: call void @__ubsan_handle_add_overflow + // CHECK: call void @__ubsan_handle_add_overflow + INT_MAX + 1; + INT_MAX + -1; + + // CHECK: call void @__ubsan_handle_negate_overflow + // CHECK: call void @__ubsan_handle_sub_overflow + -INT_MIN; + -INT_MAX - 2; + + // CHECK: call void @__ubsan_handle_mul_overflow + // CHECK: call void @__ubsan_handle_mul_overflow + INT_MAX * INT_MAX; + INT_MIN * INT_MIN; + + // CHECK: call void @__ubsan_handle_divrem_overflow + // CHECK: call void @__ubsan_handle_divrem_overflow + 1 / 0; + INT_MIN / -1; + + // CHECK: call void @__ubsan_handle_divrem_overflow + // CHECK: call void @__ubsan_handle_divrem_overflow + 1 % 0; + INT_MIN % -1; + + // CHECK: call void @__ubsan_handle_divrem_overflow + 1.0 / 0.0; +} diff --git a/test/CodeGen/aarch64-neon-2velem.c b/test/CodeGen/aarch64-neon-2velem.c index 2866990433dfc..2937d7e5b1509 100644 --- a/test/CodeGen/aarch64-neon-2velem.c +++ b/test/CodeGen/aarch64-neon-2velem.c @@ -497,7 +497,7 @@ float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { @@ -508,7 +508,7 @@ int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -519,7 +519,7 @@ int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { @@ -530,7 +530,7 @@ int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { @@ -542,7 +542,7 @@ int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { @@ -554,7 +554,7 @@ int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -566,7 +566,7 @@ int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { @@ -578,7 +578,7 @@ int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { @@ -589,7 +589,7 @@ int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { @@ -600,7 +600,7 @@ int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -611,7 +611,7 @@ int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { @@ -622,7 +622,7 @@ int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { @@ -634,7 +634,7 @@ int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { @@ -646,7 +646,7 @@ int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -658,7 +658,7 @@ int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { @@ -670,7 +670,7 @@ int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { @@ -681,7 +681,7 @@ int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { @@ -692,7 +692,7 @@ int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -703,7 +703,7 @@ int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { @@ -714,7 +714,7 @@ int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { @@ -726,7 +726,7 @@ int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { @@ -738,7 +738,7 @@ int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -750,7 +750,7 @@ int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { @@ -762,7 +762,7 @@ int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { @@ -773,7 +773,7 @@ int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { @@ -784,7 +784,7 @@ int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -795,7 +795,7 @@ int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { @@ -806,7 +806,7 @@ int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { @@ -818,7 +818,7 @@ int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { @@ -830,7 +830,7 @@ int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -842,7 +842,7 @@ int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { @@ -854,7 +854,7 @@ int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { @@ -865,7 +865,7 @@ int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) { return vmull_lane_s16(a, v, 3); @@ -875,7 +875,7 @@ int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) { return vmull_lane_s32(a, v, 1); @@ -885,7 +885,7 @@ int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) { return vmull_lane_u16(a, v, 3); @@ -895,7 +895,7 @@ uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) { return vmull_lane_u32(a, v, 1); @@ -906,7 +906,7 @@ uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) { return vmull_high_lane_s16(a, v, 3); @@ -917,7 +917,7 @@ int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) { return vmull_high_lane_s32(a, v, 1); @@ -928,7 +928,7 @@ int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) { return vmull_high_lane_u16(a, v, 3); @@ -939,7 +939,7 @@ uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) { return vmull_high_lane_u32(a, v, 1); @@ -949,7 +949,7 @@ uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) { return vmull_laneq_s16(a, v, 7); @@ -959,7 +959,7 @@ int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) { return vmull_laneq_s32(a, v, 3); @@ -969,7 +969,7 @@ int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) { return vmull_laneq_u16(a, v, 7); @@ -979,7 +979,7 @@ uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) { return vmull_laneq_u32(a, v, 3); @@ -990,7 +990,7 @@ uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) { return vmull_high_laneq_s16(a, v, 7); @@ -1001,7 +1001,7 @@ int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) { return vmull_high_laneq_s32(a, v, 3); @@ -1012,7 +1012,7 @@ int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) { return vmull_high_laneq_u16(a, v, 7); @@ -1023,7 +1023,7 @@ uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) { return vmull_high_laneq_u32(a, v, 3); @@ -1034,8 +1034,8 @@ uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { return vqdmlal_lane_s16(a, b, v, 3); @@ -1046,8 +1046,8 @@ int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { return vqdmlal_lane_s32(a, b, v, 1); @@ -1059,8 +1059,8 @@ int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { return vqdmlal_high_lane_s16(a, b, v, 3); @@ -1072,8 +1072,8 @@ int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { return vqdmlal_high_lane_s32(a, b, v, 1); @@ -1084,8 +1084,8 @@ int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { return vqdmlsl_lane_s16(a, b, v, 3); @@ -1096,8 +1096,8 @@ int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { return vqdmlsl_lane_s32(a, b, v, 1); @@ -1109,8 +1109,8 @@ int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { return vqdmlsl_high_lane_s16(a, b, v, 3); @@ -1122,8 +1122,8 @@ int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { return vqdmlsl_high_lane_s32(a, b, v, 1); @@ -1133,7 +1133,7 @@ int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) { @@ -1144,7 +1144,7 @@ int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) { @@ -1155,7 +1155,7 @@ int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) { @@ -1166,7 +1166,7 @@ int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) { @@ -1178,7 +1178,7 @@ int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) { @@ -1190,7 +1190,7 @@ int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) { @@ -1202,7 +1202,7 @@ int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) { @@ -1214,7 +1214,7 @@ int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) { @@ -1225,7 +1225,7 @@ int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) { @@ -1236,7 +1236,7 @@ int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) { @@ -1247,7 +1247,7 @@ int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) { @@ -1258,7 +1258,7 @@ int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) { @@ -1269,7 +1269,7 @@ int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) { @@ -1280,7 +1280,7 @@ int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) { @@ -1291,7 +1291,7 @@ int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) { @@ -1302,7 +1302,7 @@ int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v) { @@ -1390,7 +1390,7 @@ float64x2_t test_vmulq_laneq_f64(float64x2_t a, float64x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) // CHECK: ret <2 x float> [[VMULX2_I]] float32x2_t test_vmulx_lane_f32(float32x2_t a, float32x2_t v) { return vmulx_lane_f32(a, v, 1); @@ -1400,7 +1400,7 @@ float32x2_t test_vmulx_lane_f32(float32x2_t a, float32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) // CHECK: ret <4 x float> [[VMULX2_I]] float32x4_t test_vmulxq_lane_f32(float32x4_t a, float32x2_t v) { return vmulxq_lane_f32(a, v, 1); @@ -1410,7 +1410,7 @@ float32x4_t test_vmulxq_lane_f32(float32x4_t a, float32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) // CHECK: ret <2 x double> [[VMULX2_I]] float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) { return vmulxq_lane_f64(a, v, 0); @@ -1420,7 +1420,7 @@ float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) // CHECK: ret <2 x float> [[VMULX2_I]] float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) { return vmulx_laneq_f32(a, v, 3); @@ -1430,7 +1430,7 @@ float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) // CHECK: ret <4 x float> [[VMULX2_I]] float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) { return vmulxq_laneq_f32(a, v, 3); @@ -1440,7 +1440,7 @@ float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) // CHECK: ret <2 x double> [[VMULX2_I]] float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) { return vmulxq_laneq_f64(a, v, 1); @@ -1867,7 +1867,7 @@ float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { @@ -1878,7 +1878,7 @@ int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -1889,7 +1889,7 @@ int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { @@ -1900,7 +1900,7 @@ int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { @@ -1912,7 +1912,7 @@ int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { @@ -1924,7 +1924,7 @@ int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -1936,7 +1936,7 @@ int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { @@ -1948,7 +1948,7 @@ int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { @@ -1959,7 +1959,7 @@ int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { @@ -1970,7 +1970,7 @@ int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -1981,7 +1981,7 @@ int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { @@ -1992,7 +1992,7 @@ int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { @@ -2004,7 +2004,7 @@ int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { @@ -2016,7 +2016,7 @@ int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -2028,7 +2028,7 @@ int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { @@ -2040,7 +2040,7 @@ int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { @@ -2051,7 +2051,7 @@ int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { @@ -2062,7 +2062,7 @@ int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -2073,7 +2073,7 @@ int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { @@ -2084,7 +2084,7 @@ int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { @@ -2096,7 +2096,7 @@ int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { @@ -2108,7 +2108,7 @@ int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -2120,7 +2120,7 @@ int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { @@ -2132,7 +2132,7 @@ int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { @@ -2143,7 +2143,7 @@ int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { @@ -2154,7 +2154,7 @@ int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -2165,7 +2165,7 @@ int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { @@ -2176,7 +2176,7 @@ int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { @@ -2188,7 +2188,7 @@ int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { @@ -2200,7 +2200,7 @@ int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -2212,7 +2212,7 @@ int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { @@ -2224,7 +2224,7 @@ int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { @@ -2235,7 +2235,7 @@ int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) { return vmull_lane_s16(a, v, 0); @@ -2245,7 +2245,7 @@ int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) { return vmull_lane_s32(a, v, 0); @@ -2255,7 +2255,7 @@ int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) { return vmull_lane_u16(a, v, 0); @@ -2265,7 +2265,7 @@ uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) { return vmull_lane_u32(a, v, 0); @@ -2276,7 +2276,7 @@ uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { return vmull_high_lane_s16(a, v, 0); @@ -2287,7 +2287,7 @@ int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { return vmull_high_lane_s32(a, v, 0); @@ -2298,7 +2298,7 @@ int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) { return vmull_high_lane_u16(a, v, 0); @@ -2309,7 +2309,7 @@ uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) { return vmull_high_lane_u32(a, v, 0); @@ -2319,7 +2319,7 @@ uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) { return vmull_laneq_s16(a, v, 0); @@ -2329,7 +2329,7 @@ int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) { return vmull_laneq_s32(a, v, 0); @@ -2339,7 +2339,7 @@ int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) { return vmull_laneq_u16(a, v, 0); @@ -2349,7 +2349,7 @@ uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) { return vmull_laneq_u32(a, v, 0); @@ -2360,7 +2360,7 @@ uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { return vmull_high_laneq_s16(a, v, 0); @@ -2371,7 +2371,7 @@ int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { return vmull_high_laneq_s32(a, v, 0); @@ -2382,7 +2382,7 @@ int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) { return vmull_high_laneq_u16(a, v, 0); @@ -2393,7 +2393,7 @@ uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) { return vmull_high_laneq_u32(a, v, 0); @@ -2404,8 +2404,8 @@ uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { return vqdmlal_lane_s16(a, b, v, 0); @@ -2416,8 +2416,8 @@ int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { return vqdmlal_lane_s32(a, b, v, 0); @@ -2429,8 +2429,8 @@ int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { return vqdmlal_high_lane_s16(a, b, v, 0); @@ -2442,8 +2442,8 @@ int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { return vqdmlal_high_lane_s32(a, b, v, 0); @@ -2454,8 +2454,8 @@ int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { return vqdmlsl_lane_s16(a, b, v, 0); @@ -2466,8 +2466,8 @@ int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { return vqdmlsl_lane_s32(a, b, v, 0); @@ -2479,8 +2479,8 @@ int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { return vqdmlsl_high_lane_s16(a, b, v, 0); @@ -2492,8 +2492,8 @@ int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { return vqdmlsl_high_lane_s32(a, b, v, 0); @@ -2503,7 +2503,7 @@ int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) { @@ -2514,7 +2514,7 @@ int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) { @@ -2525,7 +2525,7 @@ int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) { @@ -2536,7 +2536,7 @@ int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) { @@ -2548,7 +2548,7 @@ int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { @@ -2560,7 +2560,7 @@ int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { @@ -2572,7 +2572,7 @@ int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { @@ -2584,7 +2584,7 @@ int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { @@ -2595,7 +2595,7 @@ int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { @@ -2606,7 +2606,7 @@ int16x4_t test_vqdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { @@ -2617,7 +2617,7 @@ int16x8_t test_vqdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { @@ -2628,7 +2628,7 @@ int32x2_t test_vqdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { @@ -2639,7 +2639,7 @@ int32x4_t test_vqdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { @@ -2650,7 +2650,7 @@ int16x4_t test_vqrdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { return vqrdmulhq_lane_s16(a, v, 0); @@ -2660,7 +2660,7 @@ int16x8_t test_vqrdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { @@ -2671,7 +2671,7 @@ int32x2_t test_vqrdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { return vqrdmulhq_lane_s32(a, v, 0); @@ -2734,7 +2734,7 @@ float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) // CHECK: ret <2 x float> [[VMULX2_I]] float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) { return vmulx_lane_f32(a, v, 0); @@ -2744,7 +2744,7 @@ float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) // CHECK: ret <4 x float> [[VMULX2_I]] float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) { return vmulxq_lane_f32(a, v, 0); @@ -2754,7 +2754,7 @@ float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) // CHECK: ret <2 x double> [[VMULX2_I]] float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) { return vmulxq_lane_f64(a, v, 0); @@ -2764,7 +2764,7 @@ float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) // CHECK: ret <2 x float> [[VMULX2_I]] float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) { return vmulx_laneq_f32(a, v, 0); @@ -2774,7 +2774,7 @@ float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) // CHECK: ret <4 x float> [[VMULX2_I]] float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) { return vmulxq_laneq_f32(a, v, 0); @@ -2784,7 +2784,7 @@ float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) #2 +// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) // CHECK: ret <2 x double> [[VMULX2_I]] float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) { return vmulxq_laneq_f64(a, v, 0); @@ -2798,7 +2798,7 @@ float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) { // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 +// CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) // CHECK: ret <4 x i32> [[VMULL5_I_I]] int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) { return vmull_high_n_s16(a, b); @@ -2810,7 +2810,7 @@ int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) { // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 +// CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) // CHECK: ret <2 x i64> [[VMULL3_I_I]] int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) { return vmull_high_n_s32(a, b); @@ -2824,7 +2824,7 @@ int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) { // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 +// CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) // CHECK: ret <4 x i32> [[VMULL5_I_I]] uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) { return vmull_high_n_u16(a, b); @@ -2836,7 +2836,7 @@ uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) { // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 +// CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) // CHECK: ret <2 x i64> [[VMULL3_I_I]] uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) { return vmull_high_n_u32(a, b); @@ -2850,7 +2850,7 @@ uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) { // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VQDMULL_V5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 +// CHECK: [[VQDMULL_V5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) // CHECK: [[VQDMULL_V6_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V5_I_I]] int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) { @@ -2863,7 +2863,7 @@ int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) { // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VQDMULL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 +// CHECK: [[VQDMULL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) // CHECK: [[VQDMULL_V4_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V3_I_I]] int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) { @@ -2878,7 +2878,7 @@ int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) { // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I_I]] int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { @@ -2891,7 +2891,7 @@ int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I_I]] int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { @@ -2906,7 +2906,7 @@ int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I_I]] uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { @@ -2919,7 +2919,7 @@ uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I_I]] uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { @@ -2935,8 +2935,8 @@ uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 -// CHECK: [[VQDMLAL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I_I]]) #2 +// CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) +// CHECK: [[VQDMLAL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V6_I_I]] int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { return vqdmlal_high_n_s16(a, b, c); @@ -2949,8 +2949,8 @@ int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 -// CHECK: [[VQDMLAL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I_I]]) #2 +// CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) +// CHECK: [[VQDMLAL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V4_I_I]] int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { return vqdmlal_high_n_s32(a, b, c); @@ -2964,7 +2964,7 @@ int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[SUB_I_I]] int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { @@ -2977,7 +2977,7 @@ int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[SUB_I_I]] int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { @@ -2992,7 +2992,7 @@ int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[SUB_I_I]] uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { @@ -3005,7 +3005,7 @@ uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[SUB_I_I]] uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { @@ -3021,8 +3021,8 @@ uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> -// CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) #2 -// CHECK: [[VQDMLSL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I_I]]) #2 +// CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) +// CHECK: [[VQDMLSL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V6_I_I]] int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { return vqdmlsl_high_n_s16(a, b, c); @@ -3035,8 +3035,8 @@ int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> -// CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) #2 -// CHECK: [[VQDMLSL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I_I]]) #2 +// CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) +// CHECK: [[VQDMLSL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V4_I_I]] int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { return vqdmlsl_high_n_s32(a, b, c); @@ -3077,7 +3077,7 @@ float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> [[VECINIT1_I]], <2 x float> %a) #2 +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> [[VECINIT1_I]], <2 x float> %a) // CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) { return vfma_n_f32(a, b, n); @@ -3091,7 +3091,7 @@ float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> [[VECINIT3_I]], <4 x float> %a) #2 +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> [[VECINIT3_I]], <4 x float> %a) // CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { return vfmaq_n_f32(a, b, n); @@ -3104,7 +3104,7 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> [[VECINIT1_I]], <2 x float> %a) #2 +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> [[VECINIT1_I]], <2 x float> %a) // CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) { return vfms_n_f32(a, b, n); @@ -3119,7 +3119,7 @@ float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> [[VECINIT3_I]], <4 x float> %a) #2 +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> [[VECINIT3_I]], <4 x float> %a) // CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { return vfmsq_n_f32(a, b, n); @@ -3224,7 +3224,7 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) // CHECK: ret <4 x i32> [[VMULL5_I]] int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { return vmull_n_s16(a, b); @@ -3235,7 +3235,7 @@ int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) // CHECK: ret <2 x i64> [[VMULL3_I]] int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { return vmull_n_s32(a, b); @@ -3248,7 +3248,7 @@ int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) // CHECK: ret <4 x i32> [[VMULL5_I]] uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { return vmull_n_u16(a, b); @@ -3259,7 +3259,7 @@ uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) // CHECK: ret <2 x i64> [[VMULL3_I]] uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { return vmull_n_u32(a, b); @@ -3272,7 +3272,7 @@ uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) // CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V5_I]] int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { @@ -3284,7 +3284,7 @@ int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V3_I]] int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { @@ -3298,7 +3298,7 @@ int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) // CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQDMULH_V5_I]] int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { @@ -3316,7 +3316,7 @@ int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> -// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) #2 +// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) // CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQDMULHQ_V9_I]] int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { @@ -3328,7 +3328,7 @@ int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQDMULH_V3_I]] int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { @@ -3342,7 +3342,7 @@ int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) #2 +// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) // CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULHQ_V5_I]] int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { @@ -3356,7 +3356,7 @@ int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) // CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRDMULH_V5_I]] int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { @@ -3374,7 +3374,7 @@ int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) #2 +// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) // CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRDMULHQ_V9_I]] int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { @@ -3386,7 +3386,7 @@ int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRDMULH_V3_I]] int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { @@ -3400,7 +3400,7 @@ int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) #2 +// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) // CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRDMULHQ_V5_I]] int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { @@ -3514,7 +3514,7 @@ uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { @@ -3526,7 +3526,7 @@ int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { @@ -3540,7 +3540,7 @@ int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { @@ -3552,7 +3552,7 @@ uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { @@ -3567,8 +3567,8 @@ uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 -// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) #2 +// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) +// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]] int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlal_n_s16(a, b, c); @@ -3580,8 +3580,8 @@ int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 -// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) #2 +// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) +// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]] int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlal_n_s32(a, b, c); @@ -3694,7 +3694,7 @@ uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { @@ -3706,7 +3706,7 @@ int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { @@ -3720,7 +3720,7 @@ int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { @@ -3732,7 +3732,7 @@ uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { @@ -3747,8 +3747,8 @@ uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #2 -// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) #2 +// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) +// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]] int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlsl_n_s16(a, b, c); @@ -3760,8 +3760,8 @@ int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #2 -// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) #2 +// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) +// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]] int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlsl_n_s32(a, b, c); @@ -3844,8 +3844,8 @@ uint32x4_t test_vmlaq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { return vqdmlal_laneq_s16(a, b, v, 0); @@ -3856,8 +3856,8 @@ int32x4_t test_vqdmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { return vqdmlal_laneq_s32(a, b, v, 0); @@ -3869,8 +3869,8 @@ int64x2_t test_vqdmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { return vqdmlal_high_laneq_s16(a, b, v, 0); @@ -3882,8 +3882,8 @@ int32x4_t test_vqdmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { return vqdmlal_high_laneq_s32(a, b, v, 0); @@ -3966,8 +3966,8 @@ uint32x4_t test_vmlsq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { return vqdmlsl_laneq_s16(a, b, v, 0); @@ -3978,8 +3978,8 @@ int32x4_t test_vqdmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { return vqdmlsl_laneq_s32(a, b, v, 0); @@ -3991,8 +3991,8 @@ int64x2_t test_vqdmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { return vqdmlsl_high_laneq_s16(a, b, v, 0); @@ -4004,8 +4004,8 @@ int32x4_t test_vqdmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { return vqdmlsl_high_laneq_s32(a, b, v, 0); @@ -4015,7 +4015,7 @@ int64x2_t test_vqdmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { @@ -4026,7 +4026,7 @@ int16x4_t test_vqdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { @@ -4037,7 +4037,7 @@ int16x8_t test_vqdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { @@ -4048,7 +4048,7 @@ int32x2_t test_vqdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { @@ -4059,7 +4059,7 @@ int32x4_t test_vqdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { @@ -4070,7 +4070,7 @@ int16x4_t test_vqrdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { @@ -4081,7 +4081,7 @@ int16x8_t test_vqrdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { @@ -4092,7 +4092,7 @@ int32x2_t test_vqrdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { @@ -4176,8 +4176,8 @@ uint32x4_t test_vmlaq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { return vqdmlal_laneq_s16(a, b, v, 7); @@ -4188,8 +4188,8 @@ int32x4_t test_vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { return vqdmlal_laneq_s32(a, b, v, 3); @@ -4201,8 +4201,8 @@ int64x2_t test_vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { return vqdmlal_high_laneq_s16(a, b, v, 7); @@ -4214,8 +4214,8 @@ int32x4_t test_vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { return vqdmlal_high_laneq_s32(a, b, v, 3); @@ -4298,8 +4298,8 @@ uint32x4_t test_vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { return vqdmlsl_laneq_s16(a, b, v, 7); @@ -4310,8 +4310,8 @@ int32x4_t test_vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { return vqdmlsl_laneq_s32(a, b, v, 3); @@ -4323,8 +4323,8 @@ int64x2_t test_vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { return vqdmlsl_high_laneq_s16(a, b, v, 7); @@ -4336,8 +4336,8 @@ int32x4_t test_vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) #2 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #2 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { return vqdmlsl_high_laneq_s32(a, b, v, 3); @@ -4347,7 +4347,7 @@ int64x2_t test_vqdmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_laneq_s16(int16x4_t a, int16x8_t v) { @@ -4358,7 +4358,7 @@ int16x4_t test_vqdmulh_laneq_s16(int16x4_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { @@ -4369,7 +4369,7 @@ int16x8_t test_vqdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_laneq_s32(int32x2_t a, int32x4_t v) { @@ -4380,7 +4380,7 @@ int32x2_t test_vqdmulh_laneq_s32(int32x2_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { @@ -4391,7 +4391,7 @@ int32x4_t test_vqdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_laneq_s16(int16x4_t a, int16x8_t v) { @@ -4402,7 +4402,7 @@ int16x4_t test_vqrdmulh_laneq_s16(int16x4_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { @@ -4413,7 +4413,7 @@ int16x8_t test_vqrdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_laneq_s32(int32x2_t a, int32x4_t v) { @@ -4424,7 +4424,7 @@ int32x2_t test_vqrdmulh_laneq_s32(int32x2_t a, int32x4_t v) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #2 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index 54877e9d8cd97..935e762d8c639 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -379,14 +379,14 @@ float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) { } // CHECK-LABEL: @test_vmul_p8( -// CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 +// CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) // CHECK: ret <8 x i8> [[VMUL_V_I]] poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) { return vmul_p8(v1, v2); } // CHECK-LABEL: @test_vmulq_p8( -// CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 +// CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) // CHECK: ret <16 x i8> [[VMULQ_V_I]] poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) { return vmulq_p8(v1, v2); @@ -638,7 +638,7 @@ float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> -// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1) #4 +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1) // CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { return vfma_f32(v1, v2, v3); @@ -648,7 +648,7 @@ float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> -// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1) #4 +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1) // CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { return vfmaq_f32(v1, v2, v3); @@ -658,7 +658,7 @@ float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> -// CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1) #4 +// CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1) // CHECK: ret <2 x double> [[TMP3]] float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { return vfmaq_f64(v1, v2, v3); @@ -669,7 +669,7 @@ float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> -// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1) #4 +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1) // CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { return vfms_f32(v1, v2, v3); @@ -680,7 +680,7 @@ float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> -// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1) #4 +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1) // CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { return vfmsq_f32(v1, v2, v3); @@ -691,7 +691,7 @@ float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> -// CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1) #4 +// CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1) // CHECK: ret <2 x double> [[TMP3]] float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { return vfmsq_f64(v1, v2, v3); @@ -719,7 +719,7 @@ float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { } // CHECK-LABEL: @test_vaba_s8( -// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4 +// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] // CHECK: ret <8 x i8> [[ADD_I]] int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { @@ -729,7 +729,7 @@ int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { // CHECK-LABEL: @test_vaba_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3) // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] // CHECK: ret <4 x i16> [[ADD_I]] int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { @@ -739,7 +739,7 @@ int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { // CHECK-LABEL: @test_vaba_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3) // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] // CHECK: ret <2 x i32> [[ADD_I]] int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { @@ -747,7 +747,7 @@ int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { } // CHECK-LABEL: @test_vaba_u8( -// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4 +// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] // CHECK: ret <8 x i8> [[ADD_I]] uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { @@ -757,7 +757,7 @@ uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { // CHECK-LABEL: @test_vaba_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3) // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] // CHECK: ret <4 x i16> [[ADD_I]] uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { @@ -767,7 +767,7 @@ uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { // CHECK-LABEL: @test_vaba_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3) // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] // CHECK: ret <2 x i32> [[ADD_I]] uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { @@ -775,7 +775,7 @@ uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { } // CHECK-LABEL: @test_vabaq_s8( -// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4 +// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] // CHECK: ret <16 x i8> [[ADD_I]] int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { @@ -785,7 +785,7 @@ int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { // CHECK-LABEL: @test_vabaq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3) // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { @@ -795,7 +795,7 @@ int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { // CHECK-LABEL: @test_vabaq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3) // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { @@ -803,7 +803,7 @@ int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { } // CHECK-LABEL: @test_vabaq_u8( -// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4 +// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] // CHECK: ret <16 x i8> [[ADD_I]] uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { @@ -813,7 +813,7 @@ uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { // CHECK-LABEL: @test_vabaq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3) // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { @@ -823,7 +823,7 @@ uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { // CHECK-LABEL: @test_vabaq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3) // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { @@ -831,7 +831,7 @@ uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { } // CHECK-LABEL: @test_vabd_s8( -// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 +// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) // CHECK: ret <8 x i8> [[VABD_I]] int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { return vabd_s8(v1, v2); @@ -840,7 +840,7 @@ int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { // CHECK-LABEL: @test_vabd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2) // CHECK: ret <4 x i16> [[VABD2_I]] int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { return vabd_s16(v1, v2); @@ -849,14 +849,14 @@ int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { // CHECK-LABEL: @test_vabd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2) // CHECK: ret <2 x i32> [[VABD2_I]] int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { return vabd_s32(v1, v2); } // CHECK-LABEL: @test_vabd_u8( -// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 +// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) // CHECK: ret <8 x i8> [[VABD_I]] uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { return vabd_u8(v1, v2); @@ -865,7 +865,7 @@ uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK-LABEL: @test_vabd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2) // CHECK: ret <4 x i16> [[VABD2_I]] uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { return vabd_u16(v1, v2); @@ -874,7 +874,7 @@ uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { // CHECK-LABEL: @test_vabd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2) // CHECK: ret <2 x i32> [[VABD2_I]] uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { return vabd_u32(v1, v2); @@ -883,14 +883,14 @@ uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { // CHECK-LABEL: @test_vabd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2) // CHECK: ret <2 x float> [[VABD2_I]] float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { return vabd_f32(v1, v2); } // CHECK-LABEL: @test_vabdq_s8( -// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 +// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) // CHECK: ret <16 x i8> [[VABD_I]] int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) { return vabdq_s8(v1, v2); @@ -899,7 +899,7 @@ int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) { // CHECK-LABEL: @test_vabdq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2) // CHECK: ret <8 x i16> [[VABD2_I]] int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) { return vabdq_s16(v1, v2); @@ -908,14 +908,14 @@ int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) { // CHECK-LABEL: @test_vabdq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2) // CHECK: ret <4 x i32> [[VABD2_I]] int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { return vabdq_s32(v1, v2); } // CHECK-LABEL: @test_vabdq_u8( -// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 +// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) // CHECK: ret <16 x i8> [[VABD_I]] uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) { return vabdq_u8(v1, v2); @@ -924,7 +924,7 @@ uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) { // CHECK-LABEL: @test_vabdq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2) // CHECK: ret <8 x i16> [[VABD2_I]] uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) { return vabdq_u16(v1, v2); @@ -933,7 +933,7 @@ uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) { // CHECK-LABEL: @test_vabdq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2) // CHECK: ret <4 x i32> [[VABD2_I]] uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) { return vabdq_u32(v1, v2); @@ -942,7 +942,7 @@ uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) { // CHECK-LABEL: @test_vabdq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2) // CHECK: ret <4 x float> [[VABD2_I]] float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) { return vabdq_f32(v1, v2); @@ -951,7 +951,7 @@ float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) { // CHECK-LABEL: @test_vabdq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2) #4 +// CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2) // CHECK: ret <2 x double> [[VABD2_I]] float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { return vabdq_f64(v1, v2); @@ -1268,7 +1268,7 @@ float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) { // CHECK-LABEL: @test_vrecps_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2) #4 +// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2) // CHECK: ret <2 x float> [[VRECPS_V2_I]] float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) { return vrecps_f32(v1, v2); @@ -1277,7 +1277,7 @@ float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) { // CHECK-LABEL: @test_vrecpsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2) #4 +// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2) // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8> // CHECK: ret <4 x float> [[VRECPSQ_V2_I]] float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) { @@ -1287,7 +1287,7 @@ float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) { // CHECK-LABEL: @test_vrecpsq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2) #4 +// CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2) // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8> // CHECK: ret <2 x double> [[VRECPSQ_V2_I]] float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) { @@ -1297,7 +1297,7 @@ float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) { // CHECK-LABEL: @test_vrsqrts_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2) #4 +// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2) // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VRSQRTS_V2_I]] float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) { @@ -1307,7 +1307,7 @@ float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) { // CHECK-LABEL: @test_vrsqrtsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2) #4 +// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2) // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8> // CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]] float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) { @@ -1317,7 +1317,7 @@ float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) { // CHECK-LABEL: @test_vrsqrtsq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2) #4 +// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2) // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8> // CHECK: ret <2 x double> [[VRSQRTSQ_V2_I]] float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) { @@ -1327,7 +1327,7 @@ float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) { // CHECK-LABEL: @test_vcage_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2) #4 +// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2) // CHECK: ret <2 x i32> [[VCAGE_V2_I]] uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) { return vcage_f32(v1, v2); @@ -1336,7 +1336,7 @@ uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) { // CHECK-LABEL: @test_vcage_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) // CHECK: ret <1 x i64> [[VCAGE_V2_I]] uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) { return vcage_f64(a, b); @@ -1345,7 +1345,7 @@ uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vcageq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2) #4 +// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2) // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]] uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) { return vcageq_f32(v1, v2); @@ -1354,7 +1354,7 @@ uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) { // CHECK-LABEL: @test_vcageq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2) #4 +// CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2) // CHECK: ret <2 x i64> [[VCAGEQ_V2_I]] uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) { return vcageq_f64(v1, v2); @@ -1363,7 +1363,7 @@ uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) { // CHECK-LABEL: @test_vcagt_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2) #4 +// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2) // CHECK: ret <2 x i32> [[VCAGT_V2_I]] uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) { return vcagt_f32(v1, v2); @@ -1372,7 +1372,7 @@ uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) { // CHECK-LABEL: @test_vcagt_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) // CHECK: ret <1 x i64> [[VCAGT_V2_I]] uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) { return vcagt_f64(a, b); @@ -1381,7 +1381,7 @@ uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vcagtq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2) #4 +// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2) // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]] uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) { return vcagtq_f32(v1, v2); @@ -1390,7 +1390,7 @@ uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) { // CHECK-LABEL: @test_vcagtq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2) #4 +// CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2) // CHECK: ret <2 x i64> [[VCAGTQ_V2_I]] uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) { return vcagtq_f64(v1, v2); @@ -1399,7 +1399,7 @@ uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) { // CHECK-LABEL: @test_vcale_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1) #4 +// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1) // CHECK: ret <2 x i32> [[VCALE_V2_I]] uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) { return vcale_f32(v1, v2); @@ -1409,7 +1409,7 @@ uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) { // CHECK-LABEL: @test_vcale_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #4 +// CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) // CHECK: ret <1 x i64> [[VCALE_V2_I]] uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) { return vcale_f64(a, b); @@ -1418,7 +1418,7 @@ uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vcaleq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1) #4 +// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1) // CHECK: ret <4 x i32> [[VCALEQ_V2_I]] uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) { return vcaleq_f32(v1, v2); @@ -1428,7 +1428,7 @@ uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) { // CHECK-LABEL: @test_vcaleq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1) #4 +// CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1) // CHECK: ret <2 x i64> [[VCALEQ_V2_I]] uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) { return vcaleq_f64(v1, v2); @@ -1438,7 +1438,7 @@ uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) { // CHECK-LABEL: @test_vcalt_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> -// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1) #4 +// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1) // CHECK: ret <2 x i32> [[VCALT_V2_I]] uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) { return vcalt_f32(v1, v2); @@ -1448,7 +1448,7 @@ uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) { // CHECK-LABEL: @test_vcalt_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #4 +// CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) // CHECK: ret <1 x i64> [[VCALT_V2_I]] uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) { return vcalt_f64(a, b); @@ -1457,7 +1457,7 @@ uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vcaltq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> -// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1) #4 +// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1) // CHECK: ret <4 x i32> [[VCALTQ_V2_I]] uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) { return vcaltq_f32(v1, v2); @@ -1467,7 +1467,7 @@ uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) { // CHECK-LABEL: @test_vcaltq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> -// CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1) #4 +// CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1) // CHECK: ret <2 x i64> [[VCALTQ_V2_I]] uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) { return vcaltq_f64(v1, v2); @@ -2505,7 +2505,7 @@ uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) { } // CHECK-LABEL: @test_vhadd_s8( -// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 +// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) // CHECK: ret <8 x i8> [[VHADD_V_I]] int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) { return vhadd_s8(v1, v2); @@ -2514,7 +2514,7 @@ int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) { // CHECK-LABEL: @test_vhadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VHADD_V2_I]] int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) { @@ -2524,7 +2524,7 @@ int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) { // CHECK-LABEL: @test_vhadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VHADD_V2_I]] int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) { @@ -2532,7 +2532,7 @@ int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) { } // CHECK-LABEL: @test_vhadd_u8( -// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 +// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) // CHECK: ret <8 x i8> [[VHADD_V_I]] uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) { return vhadd_u8(v1, v2); @@ -2541,7 +2541,7 @@ uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK-LABEL: @test_vhadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VHADD_V2_I]] uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) { @@ -2551,7 +2551,7 @@ uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) { // CHECK-LABEL: @test_vhadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VHADD_V2_I]] uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) { @@ -2559,7 +2559,7 @@ uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) { } // CHECK-LABEL: @test_vhaddq_s8( -// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 +// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) // CHECK: ret <16 x i8> [[VHADDQ_V_I]] int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) { return vhaddq_s8(v1, v2); @@ -2568,7 +2568,7 @@ int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) { // CHECK-LABEL: @test_vhaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VHADDQ_V2_I]] int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) { @@ -2578,7 +2578,7 @@ int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) { // CHECK-LABEL: @test_vhaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VHADDQ_V2_I]] int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) { @@ -2586,7 +2586,7 @@ int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) { } // CHECK-LABEL: @test_vhaddq_u8( -// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 +// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) // CHECK: ret <16 x i8> [[VHADDQ_V_I]] uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) { return vhaddq_u8(v1, v2); @@ -2595,7 +2595,7 @@ uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) { // CHECK-LABEL: @test_vhaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VHADDQ_V2_I]] uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) { @@ -2605,7 +2605,7 @@ uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) { // CHECK-LABEL: @test_vhaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VHADDQ_V2_I]] uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) { @@ -2613,7 +2613,7 @@ uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) { } // CHECK-LABEL: @test_vhsub_s8( -// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 +// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) // CHECK: ret <8 x i8> [[VHSUB_V_I]] int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) { return vhsub_s8(v1, v2); @@ -2622,7 +2622,7 @@ int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) { // CHECK-LABEL: @test_vhsub_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2) // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VHSUB_V2_I]] int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) { @@ -2632,7 +2632,7 @@ int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) { // CHECK-LABEL: @test_vhsub_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2) // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VHSUB_V2_I]] int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) { @@ -2640,7 +2640,7 @@ int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) { } // CHECK-LABEL: @test_vhsub_u8( -// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 +// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) // CHECK: ret <8 x i8> [[VHSUB_V_I]] uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) { return vhsub_u8(v1, v2); @@ -2649,7 +2649,7 @@ uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK-LABEL: @test_vhsub_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2) // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VHSUB_V2_I]] uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) { @@ -2659,7 +2659,7 @@ uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) { // CHECK-LABEL: @test_vhsub_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2) // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VHSUB_V2_I]] uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) { @@ -2667,7 +2667,7 @@ uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) { } // CHECK-LABEL: @test_vhsubq_s8( -// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 +// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) { return vhsubq_s8(v1, v2); @@ -2676,7 +2676,7 @@ int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) { // CHECK-LABEL: @test_vhsubq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2) // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) { @@ -2686,7 +2686,7 @@ int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) { // CHECK-LABEL: @test_vhsubq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2) // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) { @@ -2694,7 +2694,7 @@ int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) { } // CHECK-LABEL: @test_vhsubq_u8( -// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 +// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) { return vhsubq_u8(v1, v2); @@ -2703,7 +2703,7 @@ uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) { // CHECK-LABEL: @test_vhsubq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2) // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) { @@ -2713,7 +2713,7 @@ uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) { // CHECK-LABEL: @test_vhsubq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2) // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) { @@ -2721,7 +2721,7 @@ uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) { } // CHECK-LABEL: @test_vrhadd_s8( -// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 +// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) // CHECK: ret <8 x i8> [[VRHADD_V_I]] int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) { return vrhadd_s8(v1, v2); @@ -2730,7 +2730,7 @@ int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) { // CHECK-LABEL: @test_vrhadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRHADD_V2_I]] int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) { @@ -2740,7 +2740,7 @@ int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) { // CHECK-LABEL: @test_vrhadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRHADD_V2_I]] int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) { @@ -2748,7 +2748,7 @@ int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) { } // CHECK-LABEL: @test_vrhadd_u8( -// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 +// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) // CHECK: ret <8 x i8> [[VRHADD_V_I]] uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) { return vrhadd_u8(v1, v2); @@ -2757,7 +2757,7 @@ uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK-LABEL: @test_vrhadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> -// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRHADD_V2_I]] uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) { @@ -2767,7 +2767,7 @@ uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) { // CHECK-LABEL: @test_vrhadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> -// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRHADD_V2_I]] uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) { @@ -2775,7 +2775,7 @@ uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) { } // CHECK-LABEL: @test_vrhaddq_s8( -// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 +// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) { return vrhaddq_s8(v1, v2); @@ -2784,7 +2784,7 @@ int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) { // CHECK-LABEL: @test_vrhaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) { @@ -2794,7 +2794,7 @@ int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) { // CHECK-LABEL: @test_vrhaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) { @@ -2802,7 +2802,7 @@ int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) { } // CHECK-LABEL: @test_vrhaddq_u8( -// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 +// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) { return vrhaddq_u8(v1, v2); @@ -2811,7 +2811,7 @@ uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) { // CHECK-LABEL: @test_vrhaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) { @@ -2821,7 +2821,7 @@ uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) { // CHECK-LABEL: @test_vrhaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) { @@ -2829,7 +2829,7 @@ uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) { } // CHECK-LABEL: @test_vqadd_s8( -// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQADD_V_I]] int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { return vqadd_s8(a, b); @@ -2838,7 +2838,7 @@ int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQADD_V2_I]] int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { @@ -2848,7 +2848,7 @@ int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQADD_V2_I]] int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { @@ -2858,7 +2858,7 @@ int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqadd_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQADD_V2_I]] int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { @@ -2866,7 +2866,7 @@ int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqadd_u8( -// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQADD_V_I]] uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { return vqadd_u8(a, b); @@ -2875,7 +2875,7 @@ uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vqadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQADD_V2_I]] uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { @@ -2885,7 +2885,7 @@ uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vqadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQADD_V2_I]] uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { @@ -2895,7 +2895,7 @@ uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vqadd_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQADD_V2_I]] uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { @@ -2903,7 +2903,7 @@ uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vqaddq_s8( -// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQADDQ_V_I]] int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { return vqaddq_s8(a, b); @@ -2912,7 +2912,7 @@ int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { @@ -2922,7 +2922,7 @@ int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { @@ -2932,7 +2932,7 @@ int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqaddq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { @@ -2940,7 +2940,7 @@ int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqaddq_u8( -// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQADDQ_V_I]] uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { return vqaddq_u8(a, b); @@ -2949,7 +2949,7 @@ uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vqaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { @@ -2959,7 +2959,7 @@ uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vqaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { @@ -2969,7 +2969,7 @@ uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vqaddq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { @@ -2977,7 +2977,7 @@ uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vqsub_s8( -// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSUB_V_I]] int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { return vqsub_s8(a, b); @@ -2986,7 +2986,7 @@ int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqsub_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSUB_V2_I]] int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { @@ -2996,7 +2996,7 @@ int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqsub_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSUB_V2_I]] int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { @@ -3006,7 +3006,7 @@ int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqsub_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSUB_V2_I]] int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { @@ -3014,7 +3014,7 @@ int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqsub_u8( -// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSUB_V_I]] uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { return vqsub_u8(a, b); @@ -3023,7 +3023,7 @@ uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vqsub_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSUB_V2_I]] uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { @@ -3033,7 +3033,7 @@ uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vqsub_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSUB_V2_I]] uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { @@ -3043,7 +3043,7 @@ uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vqsub_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSUB_V2_I]] uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { @@ -3051,7 +3051,7 @@ uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vqsubq_s8( -// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { return vqsubq_s8(a, b); @@ -3060,7 +3060,7 @@ int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqsubq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { @@ -3070,7 +3070,7 @@ int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqsubq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { @@ -3080,7 +3080,7 @@ int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqsubq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { @@ -3088,7 +3088,7 @@ int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqsubq_u8( -// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { return vqsubq_u8(a, b); @@ -3097,7 +3097,7 @@ uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vqsubq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { @@ -3107,7 +3107,7 @@ uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vqsubq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { @@ -3117,7 +3117,7 @@ uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vqsubq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { @@ -3125,7 +3125,7 @@ uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vshl_s8( -// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VSHL_V_I]] int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { return vshl_s8(a, b); @@ -3134,7 +3134,7 @@ int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VSHL_V2_I]] int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { @@ -3144,7 +3144,7 @@ int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VSHL_V2_I]] int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { @@ -3154,7 +3154,7 @@ int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VSHL_V2_I]] int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { @@ -3162,7 +3162,7 @@ int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vshl_u8( -// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VSHL_V_I]] uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { return vshl_u8(a, b); @@ -3171,7 +3171,7 @@ uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VSHL_V2_I]] uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { @@ -3181,7 +3181,7 @@ uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VSHL_V2_I]] uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { @@ -3191,7 +3191,7 @@ uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VSHL_V2_I]] uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { @@ -3199,7 +3199,7 @@ uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vshlq_s8( -// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VSHLQ_V_I]] int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { return vshlq_s8(a, b); @@ -3208,7 +3208,7 @@ int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VSHLQ_V2_I]] int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { @@ -3218,7 +3218,7 @@ int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VSHLQ_V2_I]] int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { @@ -3228,7 +3228,7 @@ int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VSHLQ_V2_I]] int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { @@ -3236,7 +3236,7 @@ int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vshlq_u8( -// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VSHLQ_V_I]] uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { return vshlq_u8(a, b); @@ -3245,7 +3245,7 @@ uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VSHLQ_V2_I]] uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { @@ -3255,7 +3255,7 @@ uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VSHLQ_V2_I]] uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { @@ -3265,7 +3265,7 @@ uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VSHLQ_V2_I]] uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { @@ -3273,7 +3273,7 @@ uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqshl_s8( -// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSHL_V_I]] int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { return vqshl_s8(a, b); @@ -3282,7 +3282,7 @@ int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSHL_V2_I]] int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { @@ -3292,7 +3292,7 @@ int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSHL_V2_I]] int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { @@ -3302,7 +3302,7 @@ int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSHL_V2_I]] int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { @@ -3310,7 +3310,7 @@ int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqshl_u8( -// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSHL_V_I]] uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { return vqshl_u8(a, b); @@ -3319,7 +3319,7 @@ uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSHL_V2_I]] uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { @@ -3329,7 +3329,7 @@ uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSHL_V2_I]] uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { @@ -3339,7 +3339,7 @@ uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSHL_V2_I]] uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { @@ -3347,7 +3347,7 @@ uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqshlq_s8( -// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { return vqshlq_s8(a, b); @@ -3356,7 +3356,7 @@ int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { @@ -3366,7 +3366,7 @@ int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { @@ -3376,7 +3376,7 @@ int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { @@ -3384,7 +3384,7 @@ int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqshlq_u8( -// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { return vqshlq_u8(a, b); @@ -3393,7 +3393,7 @@ uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { @@ -3403,7 +3403,7 @@ uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { @@ -3413,7 +3413,7 @@ uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { @@ -3421,7 +3421,7 @@ uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vrshl_s8( -// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VRSHL_V_I]] int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { return vrshl_s8(a, b); @@ -3430,7 +3430,7 @@ int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vrshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRSHL_V2_I]] int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { @@ -3440,7 +3440,7 @@ int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vrshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRSHL_V2_I]] int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { @@ -3450,7 +3450,7 @@ int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vrshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VRSHL_V2_I]] int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { @@ -3458,7 +3458,7 @@ int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vrshl_u8( -// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VRSHL_V_I]] uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { return vrshl_u8(a, b); @@ -3467,7 +3467,7 @@ uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vrshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRSHL_V2_I]] uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { @@ -3477,7 +3477,7 @@ uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vrshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRSHL_V2_I]] uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { @@ -3487,7 +3487,7 @@ uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vrshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VRSHL_V2_I]] uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { @@ -3495,7 +3495,7 @@ uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vrshlq_s8( -// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { return vrshlq_s8(a, b); @@ -3504,7 +3504,7 @@ int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vrshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]] int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { @@ -3514,7 +3514,7 @@ int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vrshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]] int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { @@ -3524,7 +3524,7 @@ int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vrshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]] int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { @@ -3532,7 +3532,7 @@ int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vrshlq_u8( -// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { return vrshlq_u8(a, b); @@ -3541,7 +3541,7 @@ uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vrshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]] uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { @@ -3551,7 +3551,7 @@ uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vrshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]] uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { @@ -3561,7 +3561,7 @@ uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vrshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]] uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { @@ -3569,7 +3569,7 @@ uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqrshl_s8( -// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQRSHL_V_I]] int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { return vqrshl_s8(a, b); @@ -3578,7 +3578,7 @@ int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqrshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRSHL_V2_I]] int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { @@ -3588,7 +3588,7 @@ int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqrshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRSHL_V2_I]] int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { @@ -3598,7 +3598,7 @@ int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqrshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQRSHL_V2_I]] int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { @@ -3606,7 +3606,7 @@ int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqrshl_u8( -// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQRSHL_V_I]] uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { return vqrshl_u8(a, b); @@ -3615,7 +3615,7 @@ uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqrshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRSHL_V2_I]] uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { @@ -3625,7 +3625,7 @@ uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqrshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRSHL_V2_I]] uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { @@ -3635,7 +3635,7 @@ uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqrshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQRSHL_V2_I]] uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { @@ -3643,7 +3643,7 @@ uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqrshlq_s8( -// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { return vqrshlq_s8(a, b); @@ -3652,7 +3652,7 @@ int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqrshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]] int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { @@ -3662,7 +3662,7 @@ int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqrshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]] int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { @@ -3672,7 +3672,7 @@ int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqrshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]] int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { @@ -3680,7 +3680,7 @@ int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqrshlq_u8( -// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { return vqrshlq_u8(a, b); @@ -3689,7 +3689,7 @@ uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqrshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]] uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { @@ -3699,7 +3699,7 @@ uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqrshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]] uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { @@ -3709,7 +3709,7 @@ uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqrshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]] uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { @@ -3739,7 +3739,7 @@ poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) { } // CHECK-LABEL: @test_vmax_s8( -// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VMAX_I]] int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { return vmax_s8(a, b); @@ -3748,7 +3748,7 @@ int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vmax_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i16> [[VMAX2_I]] int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { return vmax_s16(a, b); @@ -3757,14 +3757,14 @@ int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vmax_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i32> [[VMAX2_I]] int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { return vmax_s32(a, b); } // CHECK-LABEL: @test_vmax_u8( -// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VMAX_I]] uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { return vmax_u8(a, b); @@ -3773,7 +3773,7 @@ uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vmax_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i16> [[VMAX2_I]] uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { return vmax_u16(a, b); @@ -3782,7 +3782,7 @@ uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vmax_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i32> [[VMAX2_I]] uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { return vmax_u32(a, b); @@ -3791,14 +3791,14 @@ uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vmax_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x float> [[VMAX2_I]] float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { return vmax_f32(a, b); } // CHECK-LABEL: @test_vmaxq_s8( -// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VMAX_I]] int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { return vmaxq_s8(a, b); @@ -3807,7 +3807,7 @@ int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vmaxq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i16> [[VMAX2_I]] int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { return vmaxq_s16(a, b); @@ -3816,14 +3816,14 @@ int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vmaxq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: ret <4 x i32> [[VMAX2_I]] int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { return vmaxq_s32(a, b); } // CHECK-LABEL: @test_vmaxq_u8( -// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VMAX_I]] uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { return vmaxq_u8(a, b); @@ -3832,7 +3832,7 @@ uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vmaxq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i16> [[VMAX2_I]] uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { return vmaxq_u16(a, b); @@ -3841,7 +3841,7 @@ uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vmaxq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: ret <4 x i32> [[VMAX2_I]] uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { return vmaxq_u32(a, b); @@ -3850,7 +3850,7 @@ uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vmaxq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x float> [[VMAX2_I]] float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { return vmaxq_f32(a, b); @@ -3859,14 +3859,14 @@ float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vmaxq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: ret <2 x double> [[VMAX2_I]] float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) { return vmaxq_f64(a, b); } // CHECK-LABEL: @test_vmin_s8( -// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VMIN_I]] int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { return vmin_s8(a, b); @@ -3875,7 +3875,7 @@ int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vmin_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i16> [[VMIN2_I]] int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { return vmin_s16(a, b); @@ -3884,14 +3884,14 @@ int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vmin_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i32> [[VMIN2_I]] int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { return vmin_s32(a, b); } // CHECK-LABEL: @test_vmin_u8( -// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VMIN_I]] uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { return vmin_u8(a, b); @@ -3900,7 +3900,7 @@ uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vmin_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i16> [[VMIN2_I]] uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { return vmin_u16(a, b); @@ -3909,7 +3909,7 @@ uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vmin_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i32> [[VMIN2_I]] uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { return vmin_u32(a, b); @@ -3918,14 +3918,14 @@ uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vmin_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x float> [[VMIN2_I]] float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { return vmin_f32(a, b); } // CHECK-LABEL: @test_vminq_s8( -// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VMIN_I]] int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { return vminq_s8(a, b); @@ -3934,7 +3934,7 @@ int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vminq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i16> [[VMIN2_I]] int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { return vminq_s16(a, b); @@ -3943,14 +3943,14 @@ int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vminq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: ret <4 x i32> [[VMIN2_I]] int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { return vminq_s32(a, b); } // CHECK-LABEL: @test_vminq_u8( -// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VMIN_I]] uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { return vminq_u8(a, b); @@ -3959,7 +3959,7 @@ uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vminq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i16> [[VMIN2_I]] uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { return vminq_u16(a, b); @@ -3968,7 +3968,7 @@ uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vminq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: ret <4 x i32> [[VMIN2_I]] uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { return vminq_u32(a, b); @@ -3977,7 +3977,7 @@ uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vminq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x float> [[VMIN2_I]] float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { return vminq_f32(a, b); @@ -3986,7 +3986,7 @@ float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vminq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: ret <2 x double> [[VMIN2_I]] float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) { return vminq_f64(a, b); @@ -3995,7 +3995,7 @@ float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) { // CHECK-LABEL: @test_vmaxnm_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x float> [[VMAXNM2_I]] float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { return vmaxnm_f32(a, b); @@ -4004,7 +4004,7 @@ float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vmaxnmq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x float> [[VMAXNM2_I]] float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { return vmaxnmq_f32(a, b); @@ -4013,7 +4013,7 @@ float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vmaxnmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b) #4 +// CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: ret <2 x double> [[VMAXNM2_I]] float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) { return vmaxnmq_f64(a, b); @@ -4022,7 +4022,7 @@ float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) { // CHECK-LABEL: @test_vminnm_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x float> [[VMINNM2_I]] float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { return vminnm_f32(a, b); @@ -4031,7 +4031,7 @@ float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vminnmq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x float> [[VMINNM2_I]] float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { return vminnmq_f32(a, b); @@ -4040,14 +4040,14 @@ float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vminnmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b) #4 +// CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: ret <2 x double> [[VMINNM2_I]] float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) { return vminnmq_f64(a, b); } // CHECK-LABEL: @test_vpmax_s8( -// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPMAX_I]] int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { return vpmax_s8(a, b); @@ -4056,7 +4056,7 @@ int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vpmax_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i16> [[VPMAX2_I]] int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { return vpmax_s16(a, b); @@ -4065,14 +4065,14 @@ int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vpmax_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i32> [[VPMAX2_I]] int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { return vpmax_s32(a, b); } // CHECK-LABEL: @test_vpmax_u8( -// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPMAX_I]] uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { return vpmax_u8(a, b); @@ -4081,7 +4081,7 @@ uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vpmax_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i16> [[VPMAX2_I]] uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { return vpmax_u16(a, b); @@ -4090,7 +4090,7 @@ uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vpmax_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i32> [[VPMAX2_I]] uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { return vpmax_u32(a, b); @@ -4099,14 +4099,14 @@ uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vpmax_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x float> [[VPMAX2_I]] float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { return vpmax_f32(a, b); } // CHECK-LABEL: @test_vpmaxq_s8( -// CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VPMAX_I]] int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) { return vpmaxq_s8(a, b); @@ -4115,7 +4115,7 @@ int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vpmaxq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i16> [[VPMAX2_I]] int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) { return vpmaxq_s16(a, b); @@ -4124,14 +4124,14 @@ int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vpmaxq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: ret <4 x i32> [[VPMAX2_I]] int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) { return vpmaxq_s32(a, b); } // CHECK-LABEL: @test_vpmaxq_u8( -// CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VPMAX_I]] uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) { return vpmaxq_u8(a, b); @@ -4140,7 +4140,7 @@ uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vpmaxq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i16> [[VPMAX2_I]] uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) { return vpmaxq_u16(a, b); @@ -4149,7 +4149,7 @@ uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vpmaxq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: ret <4 x i32> [[VPMAX2_I]] uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) { return vpmaxq_u32(a, b); @@ -4158,7 +4158,7 @@ uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vpmaxq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x float> [[VPMAX2_I]] float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) { return vpmaxq_f32(a, b); @@ -4167,14 +4167,14 @@ float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vpmaxq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b) #4 +// CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: ret <2 x double> [[VPMAX2_I]] float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) { return vpmaxq_f64(a, b); } // CHECK-LABEL: @test_vpmin_s8( -// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPMIN_I]] int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { return vpmin_s8(a, b); @@ -4183,7 +4183,7 @@ int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vpmin_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i16> [[VPMIN2_I]] int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { return vpmin_s16(a, b); @@ -4192,14 +4192,14 @@ int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vpmin_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i32> [[VPMIN2_I]] int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { return vpmin_s32(a, b); } // CHECK-LABEL: @test_vpmin_u8( -// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPMIN_I]] uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { return vpmin_u8(a, b); @@ -4208,7 +4208,7 @@ uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vpmin_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i16> [[VPMIN2_I]] uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { return vpmin_u16(a, b); @@ -4217,7 +4217,7 @@ uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vpmin_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i32> [[VPMIN2_I]] uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { return vpmin_u32(a, b); @@ -4226,14 +4226,14 @@ uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vpmin_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x float> [[VPMIN2_I]] float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { return vpmin_f32(a, b); } // CHECK-LABEL: @test_vpminq_s8( -// CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VPMIN_I]] int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) { return vpminq_s8(a, b); @@ -4242,7 +4242,7 @@ int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vpminq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i16> [[VPMIN2_I]] int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) { return vpminq_s16(a, b); @@ -4251,14 +4251,14 @@ int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vpminq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: ret <4 x i32> [[VPMIN2_I]] int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) { return vpminq_s32(a, b); } // CHECK-LABEL: @test_vpminq_u8( -// CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VPMIN_I]] uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) { return vpminq_u8(a, b); @@ -4267,7 +4267,7 @@ uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vpminq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i16> [[VPMIN2_I]] uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) { return vpminq_u16(a, b); @@ -4276,7 +4276,7 @@ uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vpminq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: ret <4 x i32> [[VPMIN2_I]] uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) { return vpminq_u32(a, b); @@ -4285,7 +4285,7 @@ uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vpminq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x float> [[VPMIN2_I]] float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) { return vpminq_f32(a, b); @@ -4294,7 +4294,7 @@ float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vpminq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b) #4 +// CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: ret <2 x double> [[VPMIN2_I]] float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) { return vpminq_f64(a, b); @@ -4303,7 +4303,7 @@ float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) { // CHECK-LABEL: @test_vpmaxnm_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x float> [[VPMAXNM2_I]] float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) { return vpmaxnm_f32(a, b); @@ -4312,7 +4312,7 @@ float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vpmaxnmq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x float> [[VPMAXNM2_I]] float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) { return vpmaxnmq_f32(a, b); @@ -4321,7 +4321,7 @@ float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vpmaxnmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b) #4 +// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: ret <2 x double> [[VPMAXNM2_I]] float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) { return vpmaxnmq_f64(a, b); @@ -4330,7 +4330,7 @@ float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) { // CHECK-LABEL: @test_vpminnm_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x float> [[VPMINNM2_I]] float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) { return vpminnm_f32(a, b); @@ -4339,7 +4339,7 @@ float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vpminnmq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x float> [[VPMINNM2_I]] float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) { return vpminnmq_f32(a, b); @@ -4348,14 +4348,14 @@ float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vpminnmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b) #4 +// CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: ret <2 x double> [[VPMINNM2_I]] float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) { return vpminnmq_f64(a, b); } // CHECK-LABEL: @test_vpadd_s8( -// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPADD_V_I]] int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { return vpadd_s8(a, b); @@ -4364,7 +4364,7 @@ int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vpadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VPADD_V2_I]] int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { @@ -4374,7 +4374,7 @@ int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vpadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VPADD_V2_I]] int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { @@ -4382,7 +4382,7 @@ int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vpadd_u8( -// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPADD_V_I]] uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { return vpadd_u8(a, b); @@ -4391,7 +4391,7 @@ uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vpadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VPADD_V2_I]] uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { @@ -4401,7 +4401,7 @@ uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vpadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VPADD_V2_I]] uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { @@ -4411,7 +4411,7 @@ uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vpadd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VPADD_V2_I]] float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { @@ -4419,7 +4419,7 @@ float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { } // CHECK-LABEL: @test_vpaddq_s8( -// CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VPADDQ_V_I]] int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) { return vpaddq_s8(a, b); @@ -4428,7 +4428,7 @@ int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vpaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VPADDQ_V2_I]] int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) { @@ -4438,7 +4438,7 @@ int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vpaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VPADDQ_V2_I]] int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) { @@ -4446,7 +4446,7 @@ int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vpaddq_u8( -// CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VPADDQ_V_I]] uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) { return vpaddq_u8(a, b); @@ -4455,7 +4455,7 @@ uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vpaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VPADDQ_V2_I]] uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) { @@ -4465,7 +4465,7 @@ uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vpaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VPADDQ_V2_I]] uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) { @@ -4475,7 +4475,7 @@ uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vpaddq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x float> [[VPADDQ_V2_I]] float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) { @@ -4485,7 +4485,7 @@ float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vpaddq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %a, <2 x double> %b) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x double> [[VPADDQ_V2_I]] float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) { @@ -4495,7 +4495,7 @@ float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) { // CHECK-LABEL: @test_vqdmulh_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { @@ -4505,7 +4505,7 @@ int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqdmulh_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { @@ -4515,7 +4515,7 @@ int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqdmulhq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { @@ -4525,7 +4525,7 @@ int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqdmulhq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { @@ -4535,7 +4535,7 @@ int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqrdmulh_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { @@ -4545,7 +4545,7 @@ int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqrdmulh_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { @@ -4555,7 +4555,7 @@ int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqrdmulhq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { @@ -4565,7 +4565,7 @@ int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqrdmulhq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { @@ -4575,7 +4575,7 @@ int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vmulx_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x float> [[VMULX2_I]] float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) { return vmulx_f32(a, b); @@ -4584,7 +4584,7 @@ float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vmulxq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x float> [[VMULX2_I]] float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) { return vmulxq_f32(a, b); @@ -4593,7 +4593,7 @@ float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vmulxq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b) #4 +// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b) // CHECK: ret <2 x double> [[VMULX2_I]] float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) { return vmulxq_f64(a, b); @@ -7203,7 +7203,7 @@ uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vraddhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { return vraddhn_s16(a, b); @@ -7212,7 +7212,7 @@ int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vraddhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRADDHN_V2_I]] int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { @@ -7222,7 +7222,7 @@ int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vraddhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRADDHN_V2_I]] int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { @@ -7232,7 +7232,7 @@ int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vraddhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { return vraddhn_u16(a, b); @@ -7241,7 +7241,7 @@ uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vraddhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRADDHN_V2_I]] uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { @@ -7251,7 +7251,7 @@ uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vraddhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRADDHN_V2_I]] uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { @@ -7261,7 +7261,7 @@ uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vraddhn_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { @@ -7271,7 +7271,7 @@ int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vraddhn_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7282,7 +7282,7 @@ int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vraddhn_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -7293,7 +7293,7 @@ int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vraddhn_high_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { @@ -7303,7 +7303,7 @@ uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vraddhn_high_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7314,7 +7314,7 @@ uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vraddhn_high_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -7463,7 +7463,7 @@ uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vrsubhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { return vrsubhn_s16(a, b); @@ -7472,7 +7472,7 @@ int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vrsubhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]] int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { @@ -7482,7 +7482,7 @@ int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vrsubhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]] int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { @@ -7492,7 +7492,7 @@ int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vrsubhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { return vrsubhn_u16(a, b); @@ -7501,7 +7501,7 @@ uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vrsubhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]] uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { @@ -7511,7 +7511,7 @@ uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vrsubhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]] uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { @@ -7521,7 +7521,7 @@ uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vrsubhn_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { @@ -7531,7 +7531,7 @@ int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vrsubhn_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7542,7 +7542,7 @@ int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vrsubhn_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -7553,7 +7553,7 @@ int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vrsubhn_high_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { @@ -7563,7 +7563,7 @@ uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vrsubhn_high_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7574,7 +7574,7 @@ uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vrsubhn_high_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -7583,7 +7583,7 @@ uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vabdl_s8( -// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I_I]] int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { @@ -7593,7 +7593,7 @@ int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vabdl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I_I]] @@ -7604,7 +7604,7 @@ int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vabdl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I_I]] @@ -7613,7 +7613,7 @@ int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vabdl_u8( -// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I_I]] uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { @@ -7623,7 +7623,7 @@ uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vabdl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I_I]] @@ -7634,7 +7634,7 @@ uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vabdl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I_I]] @@ -7643,7 +7643,7 @@ uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vabal_s8( -// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -7654,7 +7654,7 @@ int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { // CHECK-LABEL: @test_vabal_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] @@ -7666,7 +7666,7 @@ int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: @test_vabal_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] @@ -7676,7 +7676,7 @@ int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { } // CHECK-LABEL: @test_vabal_u8( -// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -7687,7 +7687,7 @@ uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { // CHECK-LABEL: @test_vabal_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c) // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] @@ -7699,7 +7699,7 @@ uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { // CHECK-LABEL: @test_vabal_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c) // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] @@ -7711,7 +7711,7 @@ uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { // CHECK-LABEL: @test_vabdl_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> -// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I_I_I]] int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) { @@ -7723,7 +7723,7 @@ int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I_I_I]] @@ -7736,7 +7736,7 @@ int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I_I_I]] @@ -7747,7 +7747,7 @@ int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vabdl_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> -// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I_I_I]] uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) { @@ -7759,7 +7759,7 @@ uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I_I_I]] @@ -7772,7 +7772,7 @@ uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I_I_I]] @@ -7783,7 +7783,7 @@ uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vabal_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> -// CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16> // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]] // CHECK: ret <8 x i16> [[ADD_I_I]] @@ -7796,7 +7796,7 @@ int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32> // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]] @@ -7810,7 +7810,7 @@ int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64> // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]] @@ -7822,7 +7822,7 @@ int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { // CHECK-LABEL: @test_vabal_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> -// CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16> // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]] // CHECK: ret <8 x i16> [[ADD_I_I]] @@ -7835,7 +7835,7 @@ uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32> // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]] @@ -7849,7 +7849,7 @@ uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64> // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]] @@ -7859,7 +7859,7 @@ uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { } // CHECK-LABEL: @test_vmull_s8( -// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i16> [[VMULL_I]] int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { return vmull_s8(a, b); @@ -7868,7 +7868,7 @@ int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vmull_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { return vmull_s16(a, b); @@ -7877,14 +7877,14 @@ int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vmull_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) { return vmull_s32(a, b); } // CHECK-LABEL: @test_vmull_u8( -// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i16> [[VMULL_I]] uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { return vmull_u8(a, b); @@ -7893,7 +7893,7 @@ uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vmull_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { return vmull_u16(a, b); @@ -7902,7 +7902,7 @@ uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vmull_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { return vmull_u32(a, b); @@ -7911,7 +7911,7 @@ uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vmull_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: ret <8 x i16> [[VMULL_I_I]] int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) { return vmull_high_s8(a, b); @@ -7922,7 +7922,7 @@ int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: ret <4 x i32> [[VMULL2_I_I]] int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) { return vmull_high_s16(a, b); @@ -7933,7 +7933,7 @@ int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: ret <2 x i64> [[VMULL2_I_I]] int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) { return vmull_high_s32(a, b); @@ -7942,7 +7942,7 @@ int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vmull_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: ret <8 x i16> [[VMULL_I_I]] uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) { return vmull_high_u8(a, b); @@ -7953,7 +7953,7 @@ uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: ret <4 x i32> [[VMULL2_I_I]] uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) { return vmull_high_u16(a, b); @@ -7964,14 +7964,14 @@ uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: ret <2 x i64> [[VMULL2_I_I]] uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) { return vmull_high_u32(a, b); } // CHECK-LABEL: @test_vmlal_s8( -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { @@ -7981,7 +7981,7 @@ int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { // CHECK-LABEL: @test_vmlal_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { @@ -7991,7 +7991,7 @@ int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: @test_vmlal_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { @@ -7999,7 +7999,7 @@ int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { } // CHECK-LABEL: @test_vmlal_u8( -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { @@ -8009,7 +8009,7 @@ uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { // CHECK-LABEL: @test_vmlal_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { @@ -8019,7 +8019,7 @@ uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { // CHECK-LABEL: @test_vmlal_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { @@ -8029,7 +8029,7 @@ uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { // CHECK-LABEL: @test_vmlal_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> -// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]] // CHECK: ret <8 x i16> [[ADD_I_I]] int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { @@ -8041,7 +8041,7 @@ int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I_I]] int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { @@ -8053,7 +8053,7 @@ int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I_I]] int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { @@ -8063,7 +8063,7 @@ int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { // CHECK-LABEL: @test_vmlal_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> -// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]] // CHECK: ret <8 x i16> [[ADD_I_I]] uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { @@ -8075,7 +8075,7 @@ uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I_I]] uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { @@ -8087,7 +8087,7 @@ uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I_I]] uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { @@ -8095,7 +8095,7 @@ uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { } // CHECK-LABEL: @test_vmlsl_s8( -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { @@ -8105,7 +8105,7 @@ int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { // CHECK-LABEL: @test_vmlsl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { @@ -8115,7 +8115,7 @@ int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: @test_vmlsl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { @@ -8123,7 +8123,7 @@ int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { } // CHECK-LABEL: @test_vmlsl_u8( -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { @@ -8133,7 +8133,7 @@ uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { // CHECK-LABEL: @test_vmlsl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { @@ -8143,7 +8143,7 @@ uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { // CHECK-LABEL: @test_vmlsl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { @@ -8153,7 +8153,7 @@ uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { // CHECK-LABEL: @test_vmlsl_high_s8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> -// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]] // CHECK: ret <8 x i16> [[SUB_I_I]] int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { @@ -8165,7 +8165,7 @@ int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[SUB_I_I]] int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { @@ -8177,7 +8177,7 @@ int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[SUB_I_I]] int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { @@ -8187,7 +8187,7 @@ int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { // CHECK-LABEL: @test_vmlsl_high_u8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> -// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]] // CHECK: ret <8 x i16> [[SUB_I_I]] uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { @@ -8199,7 +8199,7 @@ uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] // CHECK: ret <4 x i32> [[SUB_I_I]] uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { @@ -8211,7 +8211,7 @@ uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] // CHECK: ret <2 x i64> [[SUB_I_I]] uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { @@ -8221,7 +8221,7 @@ uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { // CHECK-LABEL: @test_vqdmull_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { @@ -8231,7 +8231,7 @@ int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqdmull_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { @@ -8242,8 +8242,8 @@ int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlal_s16(a, b, c); @@ -8253,8 +8253,8 @@ int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlal_s32(a, b, c); @@ -8264,8 +8264,8 @@ int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlsl_s16(a, b, c); @@ -8275,8 +8275,8 @@ int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlsl_s32(a, b, c); @@ -8287,7 +8287,7 @@ int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I_I]] int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) { @@ -8299,7 +8299,7 @@ int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) { // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I_I]] int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) { @@ -8312,8 +8312,8 @@ int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 -// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]]) #4 +// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) +// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]] int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { return vqdmlal_high_s16(a, b, c); @@ -8325,8 +8325,8 @@ int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 -// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]]) #4 +// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) +// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]] int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { return vqdmlal_high_s32(a, b, c); @@ -8338,8 +8338,8 @@ int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) #4 -// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]]) #4 +// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) +// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]] int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { return vqdmlsl_high_s16(a, b, c); @@ -8351,15 +8351,15 @@ int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> -// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) #4 -// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]]) #4 +// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) +// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]] int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { return vqdmlsl_high_s32(a, b, c); } // CHECK-LABEL: @test_vmull_p8( -// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i16> [[VMULL_I]] poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { return vmull_p8(a, b); @@ -8368,7 +8368,7 @@ poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { // CHECK-LABEL: @test_vmull_high_p8( // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) // CHECK: ret <8 x i16> [[VMULL_I_I]] poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) { return vmull_high_p8(a, b); @@ -8405,7 +8405,7 @@ uint64_t test_vsubd_u64(uint64_t a, uint64_t b) { // CHECK-LABEL: @test_vqaddb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 -// CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 +// CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0 // CHECK: ret i8 [[TMP2]] int8_t test_vqaddb_s8(int8_t a, int8_t b) { @@ -8415,7 +8415,7 @@ int8_t test_vqaddb_s8(int8_t a, int8_t b) { // CHECK-LABEL: @test_vqaddh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0 // CHECK: ret i16 [[TMP2]] int16_t test_vqaddh_s16(int16_t a, int16_t b) { @@ -8423,14 +8423,14 @@ int16_t test_vqaddh_s16(int16_t a, int16_t b) { } // CHECK-LABEL: @test_vqadds_s32( -// CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b) #4 +// CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQADDS_S32_I]] int32_t test_vqadds_s32(int32_t a, int32_t b) { return vqadds_s32(a, b); } // CHECK-LABEL: @test_vqaddd_s64( -// CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b) #4 +// CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VQADDD_S64_I]] int64_t test_vqaddd_s64(int64_t a, int64_t b) { return vqaddd_s64(a, b); @@ -8439,7 +8439,7 @@ int64_t test_vqaddd_s64(int64_t a, int64_t b) { // CHECK-LABEL: @test_vqaddb_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 -// CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 +// CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0 // CHECK: ret i8 [[TMP2]] uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) { @@ -8449,7 +8449,7 @@ uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) { // CHECK-LABEL: @test_vqaddh_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0 // CHECK: ret i16 [[TMP2]] uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) { @@ -8457,14 +8457,14 @@ uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) { } // CHECK-LABEL: @test_vqadds_u32( -// CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b) #4 +// CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQADDS_U32_I]] uint32_t test_vqadds_u32(uint32_t a, uint32_t b) { return vqadds_u32(a, b); } // CHECK-LABEL: @test_vqaddd_u64( -// CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b) #4 +// CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VQADDD_U64_I]] uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) { return vqaddd_u64(a, b); @@ -8473,7 +8473,7 @@ uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) { // CHECK-LABEL: @test_vqsubb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 -// CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 +// CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0 // CHECK: ret i8 [[TMP2]] int8_t test_vqsubb_s8(int8_t a, int8_t b) { @@ -8483,7 +8483,7 @@ int8_t test_vqsubb_s8(int8_t a, int8_t b) { // CHECK-LABEL: @test_vqsubh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0 // CHECK: ret i16 [[TMP2]] int16_t test_vqsubh_s16(int16_t a, int16_t b) { @@ -8491,14 +8491,14 @@ int16_t test_vqsubh_s16(int16_t a, int16_t b) { } // CHECK-LABEL: @test_vqsubs_s32( -// CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b) #4 +// CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQSUBS_S32_I]] int32_t test_vqsubs_s32(int32_t a, int32_t b) { return vqsubs_s32(a, b); } // CHECK-LABEL: @test_vqsubd_s64( -// CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b) #4 +// CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VQSUBD_S64_I]] int64_t test_vqsubd_s64(int64_t a, int64_t b) { return vqsubd_s64(a, b); @@ -8507,7 +8507,7 @@ int64_t test_vqsubd_s64(int64_t a, int64_t b) { // CHECK-LABEL: @test_vqsubb_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 -// CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 +// CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0 // CHECK: ret i8 [[TMP2]] uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) { @@ -8517,7 +8517,7 @@ uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) { // CHECK-LABEL: @test_vqsubh_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0 // CHECK: ret i16 [[TMP2]] uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) { @@ -8525,28 +8525,28 @@ uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) { } // CHECK-LABEL: @test_vqsubs_u32( -// CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b) #4 +// CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQSUBS_U32_I]] uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) { return vqsubs_u32(a, b); } // CHECK-LABEL: @test_vqsubd_u64( -// CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b) #4 +// CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VQSUBD_U64_I]] uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) { return vqsubd_u64(a, b); } // CHECK-LABEL: @test_vshld_s64( -// CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b) #4 +// CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VSHLD_S64_I]] int64_t test_vshld_s64(int64_t a, int64_t b) { return vshld_s64(a, b); } // CHECK-LABEL: @test_vshld_u64( -// CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) #4 +// CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VSHLD_U64_I]] uint64_t test_vshld_u64(uint64_t a, uint64_t b) { return vshld_u64(a, b); @@ -8555,7 +8555,7 @@ uint64_t test_vshld_u64(uint64_t a, uint64_t b) { // CHECK-LABEL: @test_vqshlb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 -// CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 +// CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0 // CHECK: ret i8 [[TMP2]] int8_t test_vqshlb_s8(int8_t a, int8_t b) { @@ -8565,7 +8565,7 @@ int8_t test_vqshlb_s8(int8_t a, int8_t b) { // CHECK-LABEL: @test_vqshlh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0 // CHECK: ret i16 [[TMP2]] int16_t test_vqshlh_s16(int16_t a, int16_t b) { @@ -8573,14 +8573,14 @@ int16_t test_vqshlh_s16(int16_t a, int16_t b) { } // CHECK-LABEL: @test_vqshls_s32( -// CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b) #4 +// CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQSHLS_S32_I]] int32_t test_vqshls_s32(int32_t a, int32_t b) { return vqshls_s32(a, b); } // CHECK-LABEL: @test_vqshld_s64( -// CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b) #4 +// CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VQSHLD_S64_I]] int64_t test_vqshld_s64(int64_t a, int64_t b) { return vqshld_s64(a, b); @@ -8589,7 +8589,7 @@ int64_t test_vqshld_s64(int64_t a, int64_t b) { // CHECK-LABEL: @test_vqshlb_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 -// CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 +// CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0 // CHECK: ret i8 [[TMP2]] uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) { @@ -8599,7 +8599,7 @@ uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) { // CHECK-LABEL: @test_vqshlh_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0 // CHECK: ret i16 [[TMP2]] uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) { @@ -8607,28 +8607,28 @@ uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) { } // CHECK-LABEL: @test_vqshls_u32( -// CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) #4 +// CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQSHLS_U32_I]] uint32_t test_vqshls_u32(uint32_t a, uint32_t b) { return vqshls_u32(a, b); } // CHECK-LABEL: @test_vqshld_u64( -// CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) #4 +// CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VQSHLD_U64_I]] uint64_t test_vqshld_u64(uint64_t a, uint64_t b) { return vqshld_u64(a, b); } // CHECK-LABEL: @test_vrshld_s64( -// CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b) #4 +// CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VRSHLD_S64_I]] int64_t test_vrshld_s64(int64_t a, int64_t b) { return vrshld_s64(a, b); } // CHECK-LABEL: @test_vrshld_u64( -// CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) #4 +// CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VRSHLD_U64_I]] uint64_t test_vrshld_u64(uint64_t a, uint64_t b) { return vrshld_u64(a, b); @@ -8637,7 +8637,7 @@ uint64_t test_vrshld_u64(uint64_t a, uint64_t b) { // CHECK-LABEL: @test_vqrshlb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 -// CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 +// CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0 // CHECK: ret i8 [[TMP2]] int8_t test_vqrshlb_s8(int8_t a, int8_t b) { @@ -8647,7 +8647,7 @@ int8_t test_vqrshlb_s8(int8_t a, int8_t b) { // CHECK-LABEL: @test_vqrshlh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0 // CHECK: ret i16 [[TMP2]] int16_t test_vqrshlh_s16(int16_t a, int16_t b) { @@ -8655,14 +8655,14 @@ int16_t test_vqrshlh_s16(int16_t a, int16_t b) { } // CHECK-LABEL: @test_vqrshls_s32( -// CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b) #4 +// CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQRSHLS_S32_I]] int32_t test_vqrshls_s32(int32_t a, int32_t b) { return vqrshls_s32(a, b); } // CHECK-LABEL: @test_vqrshld_s64( -// CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b) #4 +// CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VQRSHLD_S64_I]] int64_t test_vqrshld_s64(int64_t a, int64_t b) { return vqrshld_s64(a, b); @@ -8671,7 +8671,7 @@ int64_t test_vqrshld_s64(int64_t a, int64_t b) { // CHECK-LABEL: @test_vqrshlb_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 -// CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 +// CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0 // CHECK: ret i8 [[TMP2]] uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) { @@ -8681,7 +8681,7 @@ uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) { // CHECK-LABEL: @test_vqrshlh_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0 // CHECK: ret i16 [[TMP2]] uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) { @@ -8689,14 +8689,14 @@ uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) { } // CHECK-LABEL: @test_vqrshls_u32( -// CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) #4 +// CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQRSHLS_U32_I]] uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) { return vqrshls_u32(a, b); } // CHECK-LABEL: @test_vqrshld_u64( -// CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) #4 +// CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VQRSHLD_U64_I]] uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) { return vqrshld_u64(a, b); @@ -8704,7 +8704,7 @@ uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) { // CHECK-LABEL: @test_vpaddd_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) #4 +// CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) // CHECK: ret i64 [[VPADDD_S64_I]] int64_t test_vpaddd_s64(int64x2_t a) { return vpaddd_s64(a); @@ -8732,7 +8732,7 @@ float64_t test_vpaddd_f64(float64x2_t a) { // CHECK-LABEL: @test_vpmaxnms_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a) #4 +// CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VPMAXNMS_F32_I]] float32_t test_vpmaxnms_f32(float32x2_t a) { return vpmaxnms_f32(a); @@ -8740,7 +8740,7 @@ float32_t test_vpmaxnms_f32(float32x2_t a) { // CHECK-LABEL: @test_vpmaxnmqd_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a) #4 +// CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VPMAXNMQD_F64_I]] float64_t test_vpmaxnmqd_f64(float64x2_t a) { return vpmaxnmqd_f64(a); @@ -8748,7 +8748,7 @@ float64_t test_vpmaxnmqd_f64(float64x2_t a) { // CHECK-LABEL: @test_vpmaxs_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) #4 +// CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VPMAXS_F32_I]] float32_t test_vpmaxs_f32(float32x2_t a) { return vpmaxs_f32(a); @@ -8756,7 +8756,7 @@ float32_t test_vpmaxs_f32(float32x2_t a) { // CHECK-LABEL: @test_vpmaxqd_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a) #4 +// CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VPMAXQD_F64_I]] float64_t test_vpmaxqd_f64(float64x2_t a) { return vpmaxqd_f64(a); @@ -8764,7 +8764,7 @@ float64_t test_vpmaxqd_f64(float64x2_t a) { // CHECK-LABEL: @test_vpminnms_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a) #4 +// CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VPMINNMS_F32_I]] float32_t test_vpminnms_f32(float32x2_t a) { return vpminnms_f32(a); @@ -8772,7 +8772,7 @@ float32_t test_vpminnms_f32(float32x2_t a) { // CHECK-LABEL: @test_vpminnmqd_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a) #4 +// CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VPMINNMQD_F64_I]] float64_t test_vpminnmqd_f64(float64x2_t a) { return vpminnmqd_f64(a); @@ -8780,7 +8780,7 @@ float64_t test_vpminnmqd_f64(float64x2_t a) { // CHECK-LABEL: @test_vpmins_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a) #4 +// CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VPMINS_F32_I]] float32_t test_vpmins_f32(float32x2_t a) { return vpmins_f32(a); @@ -8788,7 +8788,7 @@ float32_t test_vpmins_f32(float32x2_t a) { // CHECK-LABEL: @test_vpminqd_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a) #4 +// CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VPMINQD_F64_I]] float64_t test_vpminqd_f64(float64x2_t a) { return vpminqd_f64(a); @@ -8797,7 +8797,7 @@ float64_t test_vpminqd_f64(float64x2_t a) { // CHECK-LABEL: @test_vqdmulhh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0 // CHECK: ret i16 [[TMP2]] int16_t test_vqdmulhh_s16(int16_t a, int16_t b) { @@ -8805,7 +8805,7 @@ int16_t test_vqdmulhh_s16(int16_t a, int16_t b) { } // CHECK-LABEL: @test_vqdmulhs_s32( -// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b) #4 +// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQDMULHS_S32_I]] int32_t test_vqdmulhs_s32(int32_t a, int32_t b) { return vqdmulhs_s32(a, b); @@ -8814,7 +8814,7 @@ int32_t test_vqdmulhs_s32(int32_t a, int32_t b) { // CHECK-LABEL: @test_vqrdmulhh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 // CHECK: ret i16 [[TMP2]] int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) { @@ -8822,21 +8822,21 @@ int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) { } // CHECK-LABEL: @test_vqrdmulhs_s32( -// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b) #4 +// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VQRDMULHS_S32_I]] int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) { return vqrdmulhs_s32(a, b); } // CHECK-LABEL: @test_vmulxs_f32( -// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) #4 +// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) // CHECK: ret float [[VMULXS_F32_I]] float32_t test_vmulxs_f32(float32_t a, float32_t b) { return vmulxs_f32(a, b); } // CHECK-LABEL: @test_vmulxd_f64( -// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) #4 +// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) // CHECK: ret double [[VMULXD_F64_I]] float64_t test_vmulxd_f64(float64_t a, float64_t b) { return vmulxd_f64(a, b); @@ -8845,35 +8845,35 @@ float64_t test_vmulxd_f64(float64_t a, float64_t b) { // CHECK-LABEL: @test_vmulx_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b) // CHECK: ret <1 x double> [[VMULX2_I]] float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) { return vmulx_f64(a, b); } // CHECK-LABEL: @test_vrecpss_f32( -// CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b) #4 +// CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b) // CHECK: ret float [[VRECPS_I]] float32_t test_vrecpss_f32(float32_t a, float32_t b) { return vrecpss_f32(a, b); } // CHECK-LABEL: @test_vrecpsd_f64( -// CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b) #4 +// CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b) // CHECK: ret double [[VRECPS_I]] float64_t test_vrecpsd_f64(float64_t a, float64_t b) { return vrecpsd_f64(a, b); } // CHECK-LABEL: @test_vrsqrtss_f32( -// CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) #4 +// CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) // CHECK: ret float [[VRSQRTSS_F32_I]] float32_t test_vrsqrtss_f32(float32_t a, float32_t b) { return vrsqrtss_f32(a, b); } // CHECK-LABEL: @test_vrsqrtsd_f64( -// CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) #4 +// CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) // CHECK: ret double [[VRSQRTSD_F64_I]] float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) { return vrsqrtsd_f64(a, b); @@ -8908,28 +8908,28 @@ float64_t test_vcvtd_f64_u64(uint64_t a) { } // CHECK-LABEL: @test_vrecpes_f32( -// CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a) #4 +// CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a) // CHECK: ret float [[VRECPES_F32_I]] float32_t test_vrecpes_f32(float32_t a) { return vrecpes_f32(a); } // CHECK-LABEL: @test_vrecped_f64( -// CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a) #4 +// CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a) // CHECK: ret double [[VRECPED_F64_I]] float64_t test_vrecped_f64(float64_t a) { return vrecped_f64(a); } // CHECK-LABEL: @test_vrecpxs_f32( -// CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a) #4 +// CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a) // CHECK: ret float [[VRECPXS_F32_I]] float32_t test_vrecpxs_f32(float32_t a) { return vrecpxs_f32(a); } // CHECK-LABEL: @test_vrecpxd_f64( -// CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a) #4 +// CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a) // CHECK: ret double [[VRECPXD_F64_I]] float64_t test_vrecpxd_f64(float64_t a) { return vrecpxd_f64(a); @@ -8937,7 +8937,7 @@ float64_t test_vrecpxd_f64(float64_t a) { // CHECK-LABEL: @test_vrsqrte_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a) #4 +// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a) // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]] uint32x2_t test_vrsqrte_u32(uint32x2_t a) { return vrsqrte_u32(a); @@ -8945,21 +8945,21 @@ uint32x2_t test_vrsqrte_u32(uint32x2_t a) { // CHECK-LABEL: @test_vrsqrteq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a) #4 +// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a) // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]] uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { return vrsqrteq_u32(a); } // CHECK-LABEL: @test_vrsqrtes_f32( -// CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a) #4 +// CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a) // CHECK: ret float [[VRSQRTES_F32_I]] float32_t test_vrsqrtes_f32(float32_t a) { return vrsqrtes_f32(a); } // CHECK-LABEL: @test_vrsqrted_f64( -// CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a) #4 +// CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a) // CHECK: ret double [[VRSQRTED_F64_I]] float64_t test_vrsqrted_f64(float64_t a) { return vrsqrted_f64(a); @@ -17146,7 +17146,7 @@ uint64_t test_vtstd_u64(uint64_t a, uint64_t b) { } // CHECK-LABEL: @test_vabsd_s64( -// CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a) #4 +// CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a) // CHECK: ret i64 [[VABSD_S64_I]] int64_t test_vabsd_s64(int64_t a) { return (int64_t)vabsd_s64(a); @@ -17154,7 +17154,7 @@ int64_t test_vabsd_s64(int64_t a) { // CHECK-LABEL: @test_vqabsb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 -// CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]]) #4 +// CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]]) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0 // CHECK: ret i8 [[TMP1]] int8_t test_vqabsb_s8(int8_t a) { @@ -17163,7 +17163,7 @@ int8_t test_vqabsb_s8(int8_t a) { // CHECK-LABEL: @test_vqabsh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 -// CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]]) #4 +// CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]]) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0 // CHECK: ret i16 [[TMP1]] int16_t test_vqabsh_s16(int16_t a) { @@ -17171,14 +17171,14 @@ int16_t test_vqabsh_s16(int16_t a) { } // CHECK-LABEL: @test_vqabss_s32( -// CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) #4 +// CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) // CHECK: ret i32 [[VQABSS_S32_I]] int32_t test_vqabss_s32(int32_t a) { return (int32_t)vqabss_s32(a); } // CHECK-LABEL: @test_vqabsd_s64( -// CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a) #4 +// CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a) // CHECK: ret i64 [[VQABSD_S64_I]] int64_t test_vqabsd_s64(int64_t a) { return (int64_t)vqabsd_s64(a); @@ -17193,7 +17193,7 @@ int64_t test_vnegd_s64(int64_t a) { // CHECK-LABEL: @test_vqnegb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 -// CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]]) #4 +// CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]]) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0 // CHECK: ret i8 [[TMP1]] int8_t test_vqnegb_s8(int8_t a) { @@ -17202,7 +17202,7 @@ int8_t test_vqnegb_s8(int8_t a) { // CHECK-LABEL: @test_vqnegh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 -// CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]]) #4 +// CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]]) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0 // CHECK: ret i16 [[TMP1]] int16_t test_vqnegh_s16(int16_t a) { @@ -17210,14 +17210,14 @@ int16_t test_vqnegh_s16(int16_t a) { } // CHECK-LABEL: @test_vqnegs_s32( -// CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a) #4 +// CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a) // CHECK: ret i32 [[VQNEGS_S32_I]] int32_t test_vqnegs_s32(int32_t a) { return (int32_t)vqnegs_s32(a); } // CHECK-LABEL: @test_vqnegd_s64( -// CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a) #4 +// CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a) // CHECK: ret i64 [[VQNEGD_S64_I]] int64_t test_vqnegd_s64(int64_t a) { return (int64_t)vqnegd_s64(a); @@ -17226,7 +17226,7 @@ int64_t test_vqnegd_s64(int64_t a) { // CHECK-LABEL: @test_vuqaddb_s8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 -// CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 +// CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0 // CHECK: ret i8 [[TMP2]] int8_t test_vuqaddb_s8(int8_t a, int8_t b) { @@ -17236,7 +17236,7 @@ int8_t test_vuqaddb_s8(int8_t a, int8_t b) { // CHECK-LABEL: @test_vuqaddh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0 // CHECK: ret i16 [[TMP2]] int16_t test_vuqaddh_s16(int16_t a, int16_t b) { @@ -17244,14 +17244,14 @@ int16_t test_vuqaddh_s16(int16_t a, int16_t b) { } // CHECK-LABEL: @test_vuqadds_s32( -// CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b) #4 +// CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VUQADDS_S32_I]] int32_t test_vuqadds_s32(int32_t a, int32_t b) { return (int32_t)vuqadds_s32(a, b); } // CHECK-LABEL: @test_vuqaddd_s64( -// CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b) #4 +// CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VUQADDD_S64_I]] int64_t test_vuqaddd_s64(int64_t a, int64_t b) { return (int64_t)vuqaddd_s64(a, b); @@ -17260,7 +17260,7 @@ int64_t test_vuqaddd_s64(int64_t a, int64_t b) { // CHECK-LABEL: @test_vsqaddb_u8( // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 -// CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 +// CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0 // CHECK: ret i8 [[TMP2]] uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) { @@ -17270,7 +17270,7 @@ uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) { // CHECK-LABEL: @test_vsqaddh_u16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0 // CHECK: ret i16 [[TMP2]] uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) { @@ -17278,14 +17278,14 @@ uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) { } // CHECK-LABEL: @test_vsqadds_u32( -// CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b) #4 +// CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b) // CHECK: ret i32 [[VSQADDS_U32_I]] uint32_t test_vsqadds_u32(uint32_t a, uint32_t b) { return (uint32_t)vsqadds_u32(a, b); } // CHECK-LABEL: @test_vsqaddd_u64( -// CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b) #4 +// CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b) // CHECK: ret i64 [[VSQADDD_U64_I]] uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) { return (uint64_t)vsqaddd_u64(a, b); @@ -17294,17 +17294,17 @@ uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) { // CHECK-LABEL: @test_vqdmlalh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0 -// CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0 -// CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]]) #4 +// CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]]) // CHECK: ret i32 [[VQDMLXL1_I]] int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) { return (int32_t)vqdmlalh_s16(a, b, c); } // CHECK-LABEL: @test_vqdmlals_s32( -// CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4 -// CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]]) #4 +// CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) +// CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]]) // CHECK: ret i64 [[VQDMLXL1_I]] int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { return (int64_t)vqdmlals_s32(a, b, c); @@ -17313,17 +17313,17 @@ int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { // CHECK-LABEL: @test_vqdmlslh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0 -// CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0 -// CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]]) #4 +// CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]]) // CHECK: ret i32 [[VQDMLXL1_I]] int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) { return (int32_t)vqdmlslh_s16(a, b, c); } // CHECK-LABEL: @test_vqdmlsls_s32( -// CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4 -// CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]]) #4 +// CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) +// CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]]) // CHECK: ret i64 [[VQDMLXL1_I]] int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) { return (int64_t)vqdmlsls_s32(a, b, c); @@ -17332,7 +17332,7 @@ int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) { // CHECK-LABEL: @test_vqdmullh_s16( // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 -// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 +// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) // CHECK: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0 // CHECK: ret i32 [[TMP2]] int32_t test_vqdmullh_s16(int16_t a, int16_t b) { @@ -17340,7 +17340,7 @@ int32_t test_vqdmullh_s16(int16_t a, int16_t b) { } // CHECK-LABEL: @test_vqdmulls_s32( -// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b) #4 +// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b) // CHECK: ret i64 [[VQDMULLS_S32_I]] int64_t test_vqdmulls_s32(int32_t a, int32_t b) { return (int64_t)vqdmulls_s32(a, b); @@ -17348,7 +17348,7 @@ int64_t test_vqdmulls_s32(int32_t a, int32_t b) { // CHECK-LABEL: @test_vqmovunh_s16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 -// CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]]) #4 +// CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]]) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0 // CHECK: ret i8 [[TMP1]] int8_t test_vqmovunh_s16(int16_t a) { @@ -17357,7 +17357,7 @@ int8_t test_vqmovunh_s16(int16_t a) { // CHECK-LABEL: @test_vqmovuns_s32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 -// CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]]) #4 +// CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]]) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0 // CHECK: ret i16 [[TMP1]] int16_t test_vqmovuns_s32(int32_t a) { @@ -17365,7 +17365,7 @@ int16_t test_vqmovuns_s32(int32_t a) { } // CHECK-LABEL: @test_vqmovund_s64( -// CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a) #4 +// CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a) // CHECK: ret i32 [[VQMOVUND_S64_I]] int32_t test_vqmovund_s64(int64_t a) { return (int32_t)vqmovund_s64(a); @@ -17373,7 +17373,7 @@ int32_t test_vqmovund_s64(int64_t a) { // CHECK-LABEL: @test_vqmovnh_s16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 -// CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]]) #4 +// CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]]) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0 // CHECK: ret i8 [[TMP1]] int8_t test_vqmovnh_s16(int16_t a) { @@ -17382,7 +17382,7 @@ int8_t test_vqmovnh_s16(int16_t a) { // CHECK-LABEL: @test_vqmovns_s32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 -// CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]]) #4 +// CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]]) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0 // CHECK: ret i16 [[TMP1]] int16_t test_vqmovns_s32(int32_t a) { @@ -17390,7 +17390,7 @@ int16_t test_vqmovns_s32(int32_t a) { } // CHECK-LABEL: @test_vqmovnd_s64( -// CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a) #4 +// CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a) // CHECK: ret i32 [[VQMOVND_S64_I]] int32_t test_vqmovnd_s64(int64_t a) { return (int32_t)vqmovnd_s64(a); @@ -17398,7 +17398,7 @@ int32_t test_vqmovnd_s64(int64_t a) { // CHECK-LABEL: @test_vqmovnh_u16( // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 -// CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]]) #4 +// CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]]) // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0 // CHECK: ret i8 [[TMP1]] int8_t test_vqmovnh_u16(int16_t a) { @@ -17407,7 +17407,7 @@ int8_t test_vqmovnh_u16(int16_t a) { // CHECK-LABEL: @test_vqmovns_u32( // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 -// CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]]) #4 +// CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]]) // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0 // CHECK: ret i16 [[TMP1]] int16_t test_vqmovns_u32(int32_t a) { @@ -17415,7 +17415,7 @@ int16_t test_vqmovns_u32(int32_t a) { } // CHECK-LABEL: @test_vqmovnd_u64( -// CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a) #4 +// CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a) // CHECK: ret i32 [[VQMOVND_U64_I]] int32_t test_vqmovnd_u64(int64_t a) { return (int32_t)vqmovnd_u64(a); @@ -17582,56 +17582,56 @@ uint64_t test_vcltzd_f64(float64_t a) { } // CHECK-LABEL: @test_vcages_f32( -// CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b) #4 +// CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b) // CHECK: ret i32 [[VCAGES_F32_I]] uint32_t test_vcages_f32(float32_t a, float32_t b) { return (uint32_t)vcages_f32(a, b); } // CHECK-LABEL: @test_vcaged_f64( -// CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b) #4 +// CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b) // CHECK: ret i64 [[VCAGED_F64_I]] uint64_t test_vcaged_f64(float64_t a, float64_t b) { return (uint64_t)vcaged_f64(a, b); } // CHECK-LABEL: @test_vcagts_f32( -// CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b) #4 +// CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b) // CHECK: ret i32 [[VCAGTS_F32_I]] uint32_t test_vcagts_f32(float32_t a, float32_t b) { return (uint32_t)vcagts_f32(a, b); } // CHECK-LABEL: @test_vcagtd_f64( -// CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b) #4 +// CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b) // CHECK: ret i64 [[VCAGTD_F64_I]] uint64_t test_vcagtd_f64(float64_t a, float64_t b) { return (uint64_t)vcagtd_f64(a, b); } // CHECK-LABEL: @test_vcales_f32( -// CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a) #4 +// CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a) // CHECK: ret i32 [[VCALES_F32_I]] uint32_t test_vcales_f32(float32_t a, float32_t b) { return (uint32_t)vcales_f32(a, b); } // CHECK-LABEL: @test_vcaled_f64( -// CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a) #4 +// CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a) // CHECK: ret i64 [[VCALED_F64_I]] uint64_t test_vcaled_f64(float64_t a, float64_t b) { return (uint64_t)vcaled_f64(a, b); } // CHECK-LABEL: @test_vcalts_f32( -// CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a) #4 +// CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a) // CHECK: ret i32 [[VCALTS_F32_I]] uint32_t test_vcalts_f32(float32_t a, float32_t b) { return (uint32_t)vcalts_f32(a, b); } // CHECK-LABEL: @test_vcaltd_f64( -// CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a) #4 +// CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a) // CHECK: ret i64 [[VCALTD_F64_I]] uint64_t test_vcaltd_f64(float64_t a, float64_t b) { return (uint64_t)vcaltd_f64(a, b); @@ -20865,14 +20865,14 @@ poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) { } // CHECK-LABEL: @test_vabds_f32( -// CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) #4 +// CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) // CHECK: ret float [[VABDS_F32_I]] float32_t test_vabds_f32(float32_t a, float32_t b) { return vabds_f32(a, b); } // CHECK-LABEL: @test_vabdd_f64( -// CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) #4 +// CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) // CHECK: ret double [[VABDD_F64_I]] float64_t test_vabdd_f64(float64_t a, float64_t b) { return vabdd_f64(a, b); @@ -20881,7 +20881,7 @@ float64_t test_vabdd_f64(float64_t a, float64_t b) { // CHECK-LABEL: @test_vuqadd_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: ret <1 x i64> [[VUQADD2_I]] int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) { return vuqadd_s64(a, b); @@ -20890,21 +20890,21 @@ int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) { // CHECK-LABEL: @test_vsqadd_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: ret <1 x i64> [[VSQADD2_I]] uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) { return vsqadd_u64(a, b); } // CHECK-LABEL: @test_vsqadd_u8( -// CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VSQADD_I]] uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) { return vsqadd_u8(a, b); } // CHECK-LABEL: @test_vsqaddq_u8( -// CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VSQADD_I]] uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) { return vsqaddq_u8(a, b); @@ -20913,7 +20913,7 @@ uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vsqadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i16> [[VSQADD2_I]] uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) { return vsqadd_u16(a, b); @@ -20922,7 +20922,7 @@ uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vsqaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i16> [[VSQADD2_I]] uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) { return vsqaddq_u16(a, b); @@ -20931,7 +20931,7 @@ uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vsqadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i32> [[VSQADD2_I]] uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) { return vsqadd_u32(a, b); @@ -20940,7 +20940,7 @@ uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vsqaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: ret <4 x i32> [[VSQADD2_I]] uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) { return vsqaddq_u32(a, b); @@ -20949,7 +20949,7 @@ uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vsqaddq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: ret <2 x i64> [[VSQADD2_I]] uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) { return vsqaddq_u64(a, b); @@ -20957,7 +20957,7 @@ uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vabs_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a) #4 +// CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a) // CHECK: ret <1 x i64> [[VABS1_I]] int64x1_t test_vabs_s64(int64x1_t a) { return vabs_s64(a); @@ -20965,7 +20965,7 @@ int64x1_t test_vabs_s64(int64x1_t a) { // CHECK-LABEL: @test_vqabs_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a) #4 +// CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a) // CHECK: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQABS_V1_I]] int64x1_t test_vqabs_s64(int64x1_t a) { @@ -20974,7 +20974,7 @@ int64x1_t test_vqabs_s64(int64x1_t a) { // CHECK-LABEL: @test_vqneg_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a) #4 +// CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a) // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQNEG_V1_I]] int64x1_t test_vqneg_s64(int64x1_t a) { @@ -20990,7 +20990,7 @@ int64x1_t test_vneg_s64(int64x1_t a) { // CHECK-LABEL: @test_vaddv_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a) #4 +// CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VADDV_F32_I]] float32_t test_vaddv_f32(float32x2_t a) { return vaddv_f32(a); @@ -20998,7 +20998,7 @@ float32_t test_vaddv_f32(float32x2_t a) { // CHECK-LABEL: @test_vaddvq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a) #4 +// CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a) // CHECK: ret float [[VADDVQ_F32_I]] float32_t test_vaddvq_f32(float32x4_t a) { return vaddvq_f32(a); @@ -21006,7 +21006,7 @@ float32_t test_vaddvq_f32(float32x4_t a) { // CHECK-LABEL: @test_vaddvq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a) #4 +// CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VADDVQ_F64_I]] float64_t test_vaddvq_f64(float64x2_t a) { return vaddvq_f64(a); @@ -21014,7 +21014,7 @@ float64_t test_vaddvq_f64(float64x2_t a) { // CHECK-LABEL: @test_vmaxv_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) #4 +// CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VMAXV_F32_I]] float32_t test_vmaxv_f32(float32x2_t a) { return vmaxv_f32(a); @@ -21022,7 +21022,7 @@ float32_t test_vmaxv_f32(float32x2_t a) { // CHECK-LABEL: @test_vmaxvq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a) #4 +// CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VMAXVQ_F64_I]] float64_t test_vmaxvq_f64(float64x2_t a) { return vmaxvq_f64(a); @@ -21030,7 +21030,7 @@ float64_t test_vmaxvq_f64(float64x2_t a) { // CHECK-LABEL: @test_vminv_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a) #4 +// CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VMINV_F32_I]] float32_t test_vminv_f32(float32x2_t a) { return vminv_f32(a); @@ -21038,7 +21038,7 @@ float32_t test_vminv_f32(float32x2_t a) { // CHECK-LABEL: @test_vminvq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a) #4 +// CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VMINVQ_F64_I]] float64_t test_vminvq_f64(float64x2_t a) { return vminvq_f64(a); @@ -21046,7 +21046,7 @@ float64_t test_vminvq_f64(float64x2_t a) { // CHECK-LABEL: @test_vmaxnmvq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a) #4 +// CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VMAXNMVQ_F64_I]] float64_t test_vmaxnmvq_f64(float64x2_t a) { return vmaxnmvq_f64(a); @@ -21054,7 +21054,7 @@ float64_t test_vmaxnmvq_f64(float64x2_t a) { // CHECK-LABEL: @test_vmaxnmv_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a) #4 +// CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VMAXNMV_F32_I]] float32_t test_vmaxnmv_f32(float32x2_t a) { return vmaxnmv_f32(a); @@ -21062,7 +21062,7 @@ float32_t test_vmaxnmv_f32(float32x2_t a) { // CHECK-LABEL: @test_vminnmvq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a) #4 +// CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VMINNMVQ_F64_I]] float64_t test_vminnmvq_f64(float64x2_t a) { return vminnmvq_f64(a); @@ -21070,7 +21070,7 @@ float64_t test_vminnmvq_f64(float64x2_t a) { // CHECK-LABEL: @test_vminnmv_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a) #4 +// CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VMINNMV_F32_I]] float32_t test_vminnmv_f32(float32x2_t a) { return vminnmv_f32(a); @@ -21079,7 +21079,7 @@ float32_t test_vminnmv_f32(float32x2_t a) { // CHECK-LABEL: @test_vpaddq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) { @@ -21089,7 +21089,7 @@ int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vpaddq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) { @@ -21098,7 +21098,7 @@ uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vpaddd_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) #4 +// CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) // CHECK: ret i64 [[VPADDD_U64_I]] uint64_t test_vpaddd_u64(uint64x2_t a) { return vpaddd_u64(a); @@ -21106,7 +21106,7 @@ uint64_t test_vpaddd_u64(uint64x2_t a) { // CHECK-LABEL: @test_vaddvq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a) #4 +// CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a) // CHECK: ret i64 [[VADDVQ_S64_I]] int64_t test_vaddvq_s64(int64x2_t a) { return vaddvq_s64(a); @@ -21114,7 +21114,7 @@ int64_t test_vaddvq_s64(int64x2_t a) { // CHECK-LABEL: @test_vaddvq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) #4 +// CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) // CHECK: ret i64 [[VADDVQ_U64_I]] uint64_t test_vaddvq_u64(uint64x2_t a) { return vaddvq_u64(a); @@ -21161,7 +21161,7 @@ float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) { // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> -// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a) #4 +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a) // CHECK: ret <1 x double> [[TMP3]] float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) { return vfma_f64(a, b, c); @@ -21172,7 +21172,7 @@ float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) { // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> -// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a) #4 +// CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a) // CHECK: ret <1 x double> [[TMP3]] float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) { return vfms_f64(a, b, c); @@ -21188,7 +21188,7 @@ float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vabd_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b) // CHECK: ret <1 x double> [[VABD2_I]] float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) { return vabd_f64(a, b); @@ -21197,7 +21197,7 @@ float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vmax_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b) // CHECK: ret <1 x double> [[VMAX2_I]] float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) { return vmax_f64(a, b); @@ -21206,7 +21206,7 @@ float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vmin_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b) // CHECK: ret <1 x double> [[VMIN2_I]] float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) { return vmin_f64(a, b); @@ -21215,7 +21215,7 @@ float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vmaxnm_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b) // CHECK: ret <1 x double> [[VMAXNM2_I]] float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) { return vmaxnm_f64(a, b); @@ -21224,7 +21224,7 @@ float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vminnm_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b) // CHECK: ret <1 x double> [[VMINNM2_I]] float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) { return vminnm_f64(a, b); @@ -21232,7 +21232,7 @@ float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vabs_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a) #4 +// CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VABS1_I]] float64x1_t test_vabs_f64(float64x1_t a) { return vabs_f64(a); @@ -21263,7 +21263,7 @@ uint64x1_t test_vcvt_u64_f64(float64x1_t a) { // CHECK-LABEL: @test_vcvtn_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a) #4 +// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a) // CHECK: ret <1 x i64> [[VCVTN1_I]] int64x1_t test_vcvtn_s64_f64(float64x1_t a) { return vcvtn_s64_f64(a); @@ -21271,7 +21271,7 @@ int64x1_t test_vcvtn_s64_f64(float64x1_t a) { // CHECK-LABEL: @test_vcvtn_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a) #4 +// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a) // CHECK: ret <1 x i64> [[VCVTN1_I]] uint64x1_t test_vcvtn_u64_f64(float64x1_t a) { return vcvtn_u64_f64(a); @@ -21279,7 +21279,7 @@ uint64x1_t test_vcvtn_u64_f64(float64x1_t a) { // CHECK-LABEL: @test_vcvtp_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a) #4 +// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a) // CHECK: ret <1 x i64> [[VCVTP1_I]] int64x1_t test_vcvtp_s64_f64(float64x1_t a) { return vcvtp_s64_f64(a); @@ -21287,7 +21287,7 @@ int64x1_t test_vcvtp_s64_f64(float64x1_t a) { // CHECK-LABEL: @test_vcvtp_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a) #4 +// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a) // CHECK: ret <1 x i64> [[VCVTP1_I]] uint64x1_t test_vcvtp_u64_f64(float64x1_t a) { return vcvtp_u64_f64(a); @@ -21295,7 +21295,7 @@ uint64x1_t test_vcvtp_u64_f64(float64x1_t a) { // CHECK-LABEL: @test_vcvtm_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a) #4 +// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a) // CHECK: ret <1 x i64> [[VCVTM1_I]] int64x1_t test_vcvtm_s64_f64(float64x1_t a) { return vcvtm_s64_f64(a); @@ -21303,7 +21303,7 @@ int64x1_t test_vcvtm_s64_f64(float64x1_t a) { // CHECK-LABEL: @test_vcvtm_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a) #4 +// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a) // CHECK: ret <1 x i64> [[VCVTM1_I]] uint64x1_t test_vcvtm_u64_f64(float64x1_t a) { return vcvtm_u64_f64(a); @@ -21311,7 +21311,7 @@ uint64x1_t test_vcvtm_u64_f64(float64x1_t a) { // CHECK-LABEL: @test_vcvta_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a) #4 +// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a) // CHECK: ret <1 x i64> [[VCVTA1_I]] int64x1_t test_vcvta_s64_f64(float64x1_t a) { return vcvta_s64_f64(a); @@ -21319,7 +21319,7 @@ int64x1_t test_vcvta_s64_f64(float64x1_t a) { // CHECK-LABEL: @test_vcvta_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a) #4 +// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a) // CHECK: ret <1 x i64> [[VCVTA1_I]] uint64x1_t test_vcvta_u64_f64(float64x1_t a) { return vcvta_u64_f64(a); @@ -21379,7 +21379,7 @@ float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) { // CHECK-LABEL: @test_vrndn_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a) #4 +// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VRNDN1_I]] float64x1_t test_vrndn_f64(float64x1_t a) { return vrndn_f64(a); @@ -21387,7 +21387,7 @@ float64x1_t test_vrndn_f64(float64x1_t a) { // CHECK-LABEL: @test_vrnda_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a) #4 +// CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VRNDA1_I]] float64x1_t test_vrnda_f64(float64x1_t a) { return vrnda_f64(a); @@ -21395,7 +21395,7 @@ float64x1_t test_vrnda_f64(float64x1_t a) { // CHECK-LABEL: @test_vrndp_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a) #4 +// CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VRNDP1_I]] float64x1_t test_vrndp_f64(float64x1_t a) { return vrndp_f64(a); @@ -21403,7 +21403,7 @@ float64x1_t test_vrndp_f64(float64x1_t a) { // CHECK-LABEL: @test_vrndm_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a) #4 +// CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VRNDM1_I]] float64x1_t test_vrndm_f64(float64x1_t a) { return vrndm_f64(a); @@ -21411,7 +21411,7 @@ float64x1_t test_vrndm_f64(float64x1_t a) { // CHECK-LABEL: @test_vrndx_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a) #4 +// CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VRNDX1_I]] float64x1_t test_vrndx_f64(float64x1_t a) { return vrndx_f64(a); @@ -21419,7 +21419,7 @@ float64x1_t test_vrndx_f64(float64x1_t a) { // CHECK-LABEL: @test_vrnd_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a) #4 +// CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VRNDZ1_I]] float64x1_t test_vrnd_f64(float64x1_t a) { return vrnd_f64(a); @@ -21427,7 +21427,7 @@ float64x1_t test_vrnd_f64(float64x1_t a) { // CHECK-LABEL: @test_vrndi_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a) #4 +// CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VRNDI1_I]] float64x1_t test_vrndi_f64(float64x1_t a) { return vrndi_f64(a); @@ -21435,7 +21435,7 @@ float64x1_t test_vrndi_f64(float64x1_t a) { // CHECK-LABEL: @test_vrsqrte_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a) #4 +// CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VRSQRTE_V1_I]] float64x1_t test_vrsqrte_f64(float64x1_t a) { return vrsqrte_f64(a); @@ -21443,7 +21443,7 @@ float64x1_t test_vrsqrte_f64(float64x1_t a) { // CHECK-LABEL: @test_vrecpe_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a) #4 +// CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VRECPE_V1_I]] float64x1_t test_vrecpe_f64(float64x1_t a) { return vrecpe_f64(a); @@ -21451,7 +21451,7 @@ float64x1_t test_vrecpe_f64(float64x1_t a) { // CHECK-LABEL: @test_vsqrt_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a) #4 +// CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a) // CHECK: ret <1 x double> [[VSQRT_I]] float64x1_t test_vsqrt_f64(float64x1_t a) { return vsqrt_f64(a); @@ -21460,7 +21460,7 @@ float64x1_t test_vsqrt_f64(float64x1_t a) { // CHECK-LABEL: @test_vrecps_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b) // CHECK: ret <1 x double> [[VRECPS_V2_I]] float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) { return vrecps_f64(a, b); @@ -21469,7 +21469,7 @@ float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vrsqrts_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b) #4 +// CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b) // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8> // CHECK: ret <1 x double> [[VRSQRTS_V2_I]] float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) { @@ -21478,7 +21478,7 @@ float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) { // CHECK-LABEL: @test_vminv_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a) #4 +// CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VMINV_S32_I]] int32_t test_vminv_s32(int32x2_t a) { return vminv_s32(a); @@ -21486,7 +21486,7 @@ int32_t test_vminv_s32(int32x2_t a) { // CHECK-LABEL: @test_vminv_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a) #4 +// CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VMINV_U32_I]] uint32_t test_vminv_u32(uint32x2_t a) { return vminv_u32(a); @@ -21494,7 +21494,7 @@ uint32_t test_vminv_u32(uint32x2_t a) { // CHECK-LABEL: @test_vmaxv_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a) #4 +// CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VMAXV_S32_I]] int32_t test_vmaxv_s32(int32x2_t a) { return vmaxv_s32(a); @@ -21502,7 +21502,7 @@ int32_t test_vmaxv_s32(int32x2_t a) { // CHECK-LABEL: @test_vmaxv_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a) #4 +// CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VMAXV_U32_I]] uint32_t test_vmaxv_u32(uint32x2_t a) { return vmaxv_u32(a); @@ -21510,7 +21510,7 @@ uint32_t test_vmaxv_u32(uint32x2_t a) { // CHECK-LABEL: @test_vaddv_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a) #4 +// CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VADDV_S32_I]] int32_t test_vaddv_s32(int32x2_t a) { return vaddv_s32(a); @@ -21518,7 +21518,7 @@ int32_t test_vaddv_s32(int32x2_t a) { // CHECK-LABEL: @test_vaddv_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a) #4 +// CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VADDV_U32_I]] uint32_t test_vaddv_u32(uint32x2_t a) { return vaddv_u32(a); @@ -21526,7 +21526,7 @@ uint32_t test_vaddv_u32(uint32x2_t a) { // CHECK-LABEL: @test_vaddlv_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a) #4 +// CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a) // CHECK: ret i64 [[VADDLV_S32_I]] int64_t test_vaddlv_s32(int32x2_t a) { return vaddlv_s32(a); @@ -21534,7 +21534,7 @@ int64_t test_vaddlv_s32(int32x2_t a) { // CHECK-LABEL: @test_vaddlv_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a) #4 +// CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a) // CHECK: ret i64 [[VADDLV_U32_I]] uint64_t test_vaddlv_u32(uint32x2_t a) { return vaddlv_u32(a); diff --git a/test/CodeGen/aarch64-neon-misc.c b/test/CodeGen/aarch64-neon-misc.c index 1342bbb0c8cbe..28fc1fe1e3f89 100644 --- a/test/CodeGen/aarch64-neon-misc.c +++ b/test/CodeGen/aarch64-neon-misc.c @@ -911,7 +911,7 @@ float32x4_t test_vrev64q_f32(float32x4_t a) { } // CHECK-LABEL: @test_vpaddl_s8( -// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %a) #2 +// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %a) // CHECK: ret <4 x i16> [[VPADDL_I]] int16x4_t test_vpaddl_s8(int8x8_t a) { return vpaddl_s8(a); @@ -919,7 +919,7 @@ int16x4_t test_vpaddl_s8(int8x8_t a) { // CHECK-LABEL: @test_vpaddl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %a) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %a) // CHECK: ret <2 x i32> [[VPADDL1_I]] int32x2_t test_vpaddl_s16(int16x4_t a) { return vpaddl_s16(a); @@ -927,14 +927,14 @@ int32x2_t test_vpaddl_s16(int16x4_t a) { // CHECK-LABEL: @test_vpaddl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %a) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %a) // CHECK: ret <1 x i64> [[VPADDL1_I]] int64x1_t test_vpaddl_s32(int32x2_t a) { return vpaddl_s32(a); } // CHECK-LABEL: @test_vpaddl_u8( -// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %a) #2 +// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %a) // CHECK: ret <4 x i16> [[VPADDL_I]] uint16x4_t test_vpaddl_u8(uint8x8_t a) { return vpaddl_u8(a); @@ -942,7 +942,7 @@ uint16x4_t test_vpaddl_u8(uint8x8_t a) { // CHECK-LABEL: @test_vpaddl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %a) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %a) // CHECK: ret <2 x i32> [[VPADDL1_I]] uint32x2_t test_vpaddl_u16(uint16x4_t a) { return vpaddl_u16(a); @@ -950,14 +950,14 @@ uint32x2_t test_vpaddl_u16(uint16x4_t a) { // CHECK-LABEL: @test_vpaddl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %a) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %a) // CHECK: ret <1 x i64> [[VPADDL1_I]] uint64x1_t test_vpaddl_u32(uint32x2_t a) { return vpaddl_u32(a); } // CHECK-LABEL: @test_vpaddlq_s8( -// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %a) #2 +// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %a) // CHECK: ret <8 x i16> [[VPADDL_I]] int16x8_t test_vpaddlq_s8(int8x16_t a) { return vpaddlq_s8(a); @@ -965,7 +965,7 @@ int16x8_t test_vpaddlq_s8(int8x16_t a) { // CHECK-LABEL: @test_vpaddlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %a) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %a) // CHECK: ret <4 x i32> [[VPADDL1_I]] int32x4_t test_vpaddlq_s16(int16x8_t a) { return vpaddlq_s16(a); @@ -973,14 +973,14 @@ int32x4_t test_vpaddlq_s16(int16x8_t a) { // CHECK-LABEL: @test_vpaddlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %a) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %a) // CHECK: ret <2 x i64> [[VPADDL1_I]] int64x2_t test_vpaddlq_s32(int32x4_t a) { return vpaddlq_s32(a); } // CHECK-LABEL: @test_vpaddlq_u8( -// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %a) #2 +// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %a) // CHECK: ret <8 x i16> [[VPADDL_I]] uint16x8_t test_vpaddlq_u8(uint8x16_t a) { return vpaddlq_u8(a); @@ -988,7 +988,7 @@ uint16x8_t test_vpaddlq_u8(uint8x16_t a) { // CHECK-LABEL: @test_vpaddlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %a) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %a) // CHECK: ret <4 x i32> [[VPADDL1_I]] uint32x4_t test_vpaddlq_u16(uint16x8_t a) { return vpaddlq_u16(a); @@ -996,7 +996,7 @@ uint32x4_t test_vpaddlq_u16(uint16x8_t a) { // CHECK-LABEL: @test_vpaddlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %a) #2 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %a) // CHECK: ret <2 x i64> [[VPADDL1_I]] uint64x2_t test_vpaddlq_u32(uint32x4_t a) { return vpaddlq_u32(a); @@ -1004,7 +1004,7 @@ uint64x2_t test_vpaddlq_u32(uint32x4_t a) { // CHECK-LABEL: @test_vpadal_s8( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %b) #2 +// CHECK: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %b) // CHECK: [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]], %a // CHECK: ret <4 x i16> [[TMP1]] int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) { @@ -1014,7 +1014,7 @@ int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) { // CHECK-LABEL: @test_vpadal_s16( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %b) #2 +// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %b) // CHECK: [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], %a // CHECK: ret <2 x i32> [[TMP2]] int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) { @@ -1024,7 +1024,7 @@ int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) { // CHECK-LABEL: @test_vpadal_s32( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %b) #2 +// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %b) // CHECK: [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], %a // CHECK: ret <1 x i64> [[TMP2]] int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) { @@ -1033,7 +1033,7 @@ int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) { // CHECK-LABEL: @test_vpadal_u8( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %b) #2 +// CHECK: [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %b) // CHECK: [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]], %a // CHECK: ret <4 x i16> [[TMP1]] uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) { @@ -1043,7 +1043,7 @@ uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) { // CHECK-LABEL: @test_vpadal_u16( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %b) #2 +// CHECK: [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %b) // CHECK: [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], %a // CHECK: ret <2 x i32> [[TMP2]] uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) { @@ -1053,7 +1053,7 @@ uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) { // CHECK-LABEL: @test_vpadal_u32( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %b) #2 +// CHECK: [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %b) // CHECK: [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], %a // CHECK: ret <1 x i64> [[TMP2]] uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) { @@ -1062,7 +1062,7 @@ uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) { // CHECK-LABEL: @test_vpadalq_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %b) #2 +// CHECK: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %b) // CHECK: [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]], %a // CHECK: ret <8 x i16> [[TMP1]] int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) { @@ -1072,7 +1072,7 @@ int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) { // CHECK-LABEL: @test_vpadalq_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %b) #2 +// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %b) // CHECK: [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]], %a // CHECK: ret <4 x i32> [[TMP2]] int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) { @@ -1082,7 +1082,7 @@ int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) { // CHECK-LABEL: @test_vpadalq_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %b) #2 +// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %b) // CHECK: [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]], %a // CHECK: ret <2 x i64> [[TMP2]] int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) { @@ -1091,7 +1091,7 @@ int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) { // CHECK-LABEL: @test_vpadalq_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %b) #2 +// CHECK: [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %b) // CHECK: [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]], %a // CHECK: ret <8 x i16> [[TMP1]] uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) { @@ -1101,7 +1101,7 @@ uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) { // CHECK-LABEL: @test_vpadalq_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %b) #2 +// CHECK: [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %b) // CHECK: [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]], %a // CHECK: ret <4 x i32> [[TMP2]] uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) { @@ -1111,7 +1111,7 @@ uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) { // CHECK-LABEL: @test_vpadalq_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %b) #2 +// CHECK: [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %b) // CHECK: [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]], %a // CHECK: ret <2 x i64> [[TMP2]] uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) { @@ -1119,14 +1119,14 @@ uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) { } // CHECK-LABEL: @test_vqabs_s8( -// CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %a) #2 +// CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VQABS_V_I]] int8x8_t test_vqabs_s8(int8x8_t a) { return vqabs_s8(a); } // CHECK-LABEL: @test_vqabsq_s8( -// CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %a) #2 +// CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VQABSQ_V_I]] int8x16_t test_vqabsq_s8(int8x16_t a) { return vqabsq_s8(a); @@ -1134,7 +1134,7 @@ int8x16_t test_vqabsq_s8(int8x16_t a) { // CHECK-LABEL: @test_vqabs_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %a) #2 +// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %a) // CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQABS_V1_I]] int16x4_t test_vqabs_s16(int16x4_t a) { @@ -1143,7 +1143,7 @@ int16x4_t test_vqabs_s16(int16x4_t a) { // CHECK-LABEL: @test_vqabsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %a) #2 +// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %a) // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQABSQ_V1_I]] int16x8_t test_vqabsq_s16(int16x8_t a) { @@ -1152,7 +1152,7 @@ int16x8_t test_vqabsq_s16(int16x8_t a) { // CHECK-LABEL: @test_vqabs_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %a) #2 +// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %a) // CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQABS_V1_I]] int32x2_t test_vqabs_s32(int32x2_t a) { @@ -1161,7 +1161,7 @@ int32x2_t test_vqabs_s32(int32x2_t a) { // CHECK-LABEL: @test_vqabsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %a) #2 +// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %a) // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQABSQ_V1_I]] int32x4_t test_vqabsq_s32(int32x4_t a) { @@ -1170,7 +1170,7 @@ int32x4_t test_vqabsq_s32(int32x4_t a) { // CHECK-LABEL: @test_vqabsq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQABSQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqabs.v2i64(<2 x i64> %a) #2 +// CHECK: [[VQABSQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqabs.v2i64(<2 x i64> %a) // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <2 x i64> [[VQABSQ_V1_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQABSQ_V1_I]] int64x2_t test_vqabsq_s64(int64x2_t a) { @@ -1178,14 +1178,14 @@ int64x2_t test_vqabsq_s64(int64x2_t a) { } // CHECK-LABEL: @test_vqneg_s8( -// CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %a) #2 +// CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VQNEG_V_I]] int8x8_t test_vqneg_s8(int8x8_t a) { return vqneg_s8(a); } // CHECK-LABEL: @test_vqnegq_s8( -// CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %a) #2 +// CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VQNEGQ_V_I]] int8x16_t test_vqnegq_s8(int8x16_t a) { return vqnegq_s8(a); @@ -1193,7 +1193,7 @@ int8x16_t test_vqnegq_s8(int8x16_t a) { // CHECK-LABEL: @test_vqneg_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %a) #2 +// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %a) // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQNEG_V1_I]] int16x4_t test_vqneg_s16(int16x4_t a) { @@ -1202,7 +1202,7 @@ int16x4_t test_vqneg_s16(int16x4_t a) { // CHECK-LABEL: @test_vqnegq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %a) #2 +// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %a) // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQNEGQ_V1_I]] int16x8_t test_vqnegq_s16(int16x8_t a) { @@ -1211,7 +1211,7 @@ int16x8_t test_vqnegq_s16(int16x8_t a) { // CHECK-LABEL: @test_vqneg_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %a) #2 +// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %a) // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQNEG_V1_I]] int32x2_t test_vqneg_s32(int32x2_t a) { @@ -1220,7 +1220,7 @@ int32x2_t test_vqneg_s32(int32x2_t a) { // CHECK-LABEL: @test_vqnegq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %a) #2 +// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %a) // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQNEGQ_V1_I]] int32x4_t test_vqnegq_s32(int32x4_t a) { @@ -1229,7 +1229,7 @@ int32x4_t test_vqnegq_s32(int32x4_t a) { // CHECK-LABEL: @test_vqnegq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQNEGQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqneg.v2i64(<2 x i64> %a) #2 +// CHECK: [[VQNEGQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqneg.v2i64(<2 x i64> %a) // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <2 x i64> [[VQNEGQ_V1_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQNEGQ_V1_I]] int64x2_t test_vqnegq_s64(int64x2_t a) { @@ -1307,14 +1307,14 @@ float64x2_t test_vnegq_f64(float64x2_t a) { } // CHECK-LABEL: @test_vabs_s8( -// CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %a) #2 +// CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VABS_I]] int8x8_t test_vabs_s8(int8x8_t a) { return vabs_s8(a); } // CHECK-LABEL: @test_vabsq_s8( -// CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %a) #2 +// CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VABS_I]] int8x16_t test_vabsq_s8(int8x16_t a) { return vabsq_s8(a); @@ -1322,7 +1322,7 @@ int8x16_t test_vabsq_s8(int8x16_t a) { // CHECK-LABEL: @test_vabs_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %a) #2 +// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %a) // CHECK: ret <4 x i16> [[VABS1_I]] int16x4_t test_vabs_s16(int16x4_t a) { return vabs_s16(a); @@ -1330,7 +1330,7 @@ int16x4_t test_vabs_s16(int16x4_t a) { // CHECK-LABEL: @test_vabsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %a) #2 +// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %a) // CHECK: ret <8 x i16> [[VABS1_I]] int16x8_t test_vabsq_s16(int16x8_t a) { return vabsq_s16(a); @@ -1338,7 +1338,7 @@ int16x8_t test_vabsq_s16(int16x8_t a) { // CHECK-LABEL: @test_vabs_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %a) #2 +// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %a) // CHECK: ret <2 x i32> [[VABS1_I]] int32x2_t test_vabs_s32(int32x2_t a) { return vabs_s32(a); @@ -1346,7 +1346,7 @@ int32x2_t test_vabs_s32(int32x2_t a) { // CHECK-LABEL: @test_vabsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %a) #2 +// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %a) // CHECK: ret <4 x i32> [[VABS1_I]] int32x4_t test_vabsq_s32(int32x4_t a) { return vabsq_s32(a); @@ -1354,7 +1354,7 @@ int32x4_t test_vabsq_s32(int32x4_t a) { // CHECK-LABEL: @test_vabsq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VABS1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.abs.v2i64(<2 x i64> %a) #2 +// CHECK: [[VABS1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.abs.v2i64(<2 x i64> %a) // CHECK: ret <2 x i64> [[VABS1_I]] int64x2_t test_vabsq_s64(int64x2_t a) { return vabsq_s64(a); @@ -1362,7 +1362,7 @@ int64x2_t test_vabsq_s64(int64x2_t a) { // CHECK-LABEL: @test_vabs_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #2 +// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VABS1_I]] float32x2_t test_vabs_f32(float32x2_t a) { return vabs_f32(a); @@ -1370,7 +1370,7 @@ float32x2_t test_vabs_f32(float32x2_t a) { // CHECK-LABEL: @test_vabsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #2 +// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VABS1_I]] float32x4_t test_vabsq_f32(float32x4_t a) { return vabsq_f32(a); @@ -1378,21 +1378,21 @@ float32x4_t test_vabsq_f32(float32x4_t a) { // CHECK-LABEL: @test_vabsq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VABS1_I:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #2 +// CHECK: [[VABS1_I:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VABS1_I]] float64x2_t test_vabsq_f64(float64x2_t a) { return vabsq_f64(a); } // CHECK-LABEL: @test_vuqadd_s8( -// CHECK: [[VUQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #2 +// CHECK: [[VUQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VUQADD_I]] int8x8_t test_vuqadd_s8(int8x8_t a, int8x8_t b) { return vuqadd_s8(a, b); } // CHECK-LABEL: @test_vuqaddq_s8( -// CHECK: [[VUQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #2 +// CHECK: [[VUQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VUQADD_I]] int8x16_t test_vuqaddq_s8(int8x16_t a, int8x16_t b) { return vuqaddq_s8(a, b); @@ -1401,7 +1401,7 @@ int8x16_t test_vuqaddq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vuqadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VUQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #2 +// CHECK: [[VUQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i16> [[VUQADD2_I]] int16x4_t test_vuqadd_s16(int16x4_t a, int16x4_t b) { return vuqadd_s16(a, b); @@ -1410,7 +1410,7 @@ int16x4_t test_vuqadd_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vuqaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VUQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #2 +// CHECK: [[VUQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i16> [[VUQADD2_I]] int16x8_t test_vuqaddq_s16(int16x8_t a, int16x8_t b) { return vuqaddq_s16(a, b); @@ -1419,7 +1419,7 @@ int16x8_t test_vuqaddq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vuqadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VUQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #2 +// CHECK: [[VUQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i32> [[VUQADD2_I]] int32x2_t test_vuqadd_s32(int32x2_t a, int32x2_t b) { return vuqadd_s32(a, b); @@ -1428,7 +1428,7 @@ int32x2_t test_vuqadd_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vuqaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VUQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #2 +// CHECK: [[VUQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: ret <4 x i32> [[VUQADD2_I]] int32x4_t test_vuqaddq_s32(int32x4_t a, int32x4_t b) { return vuqaddq_s32(a, b); @@ -1437,21 +1437,21 @@ int32x4_t test_vuqaddq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vuqaddq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VUQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #2 +// CHECK: [[VUQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: ret <2 x i64> [[VUQADD2_I]] int64x2_t test_vuqaddq_s64(int64x2_t a, int64x2_t b) { return vuqaddq_s64(a, b); } // CHECK-LABEL: @test_vcls_s8( -// CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %a) #2 +// CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VCLS_V_I]] int8x8_t test_vcls_s8(int8x8_t a) { return vcls_s8(a); } // CHECK-LABEL: @test_vclsq_s8( -// CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %a) #2 +// CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VCLSQ_V_I]] int8x16_t test_vclsq_s8(int8x16_t a) { return vclsq_s8(a); @@ -1459,7 +1459,7 @@ int8x16_t test_vclsq_s8(int8x16_t a) { // CHECK-LABEL: @test_vcls_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %a) #2 +// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %a) // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VCLS_V1_I]] int16x4_t test_vcls_s16(int16x4_t a) { @@ -1468,7 +1468,7 @@ int16x4_t test_vcls_s16(int16x4_t a) { // CHECK-LABEL: @test_vclsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %a) #2 +// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %a) // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VCLSQ_V1_I]] int16x8_t test_vclsq_s16(int16x8_t a) { @@ -1477,7 +1477,7 @@ int16x8_t test_vclsq_s16(int16x8_t a) { // CHECK-LABEL: @test_vcls_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %a) #2 +// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %a) // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VCLS_V1_I]] int32x2_t test_vcls_s32(int32x2_t a) { @@ -1486,7 +1486,7 @@ int32x2_t test_vcls_s32(int32x2_t a) { // CHECK-LABEL: @test_vclsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %a) #2 +// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %a) // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VCLSQ_V1_I]] int32x4_t test_vclsq_s32(int32x4_t a) { @@ -1494,14 +1494,14 @@ int32x4_t test_vclsq_s32(int32x4_t a) { } // CHECK-LABEL: @test_vclz_s8( -// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #2 +// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) // CHECK: ret <8 x i8> [[VCLZ_V_I]] int8x8_t test_vclz_s8(int8x8_t a) { return vclz_s8(a); } // CHECK-LABEL: @test_vclzq_s8( -// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #2 +// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) // CHECK: ret <16 x i8> [[VCLZQ_V_I]] int8x16_t test_vclzq_s8(int8x16_t a) { return vclzq_s8(a); @@ -1509,7 +1509,7 @@ int8x16_t test_vclzq_s8(int8x16_t a) { // CHECK-LABEL: @test_vclz_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #2 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VCLZ_V1_I]] int16x4_t test_vclz_s16(int16x4_t a) { @@ -1518,7 +1518,7 @@ int16x4_t test_vclz_s16(int16x4_t a) { // CHECK-LABEL: @test_vclzq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #2 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VCLZQ_V1_I]] int16x8_t test_vclzq_s16(int16x8_t a) { @@ -1527,7 +1527,7 @@ int16x8_t test_vclzq_s16(int16x8_t a) { // CHECK-LABEL: @test_vclz_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #2 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VCLZ_V1_I]] int32x2_t test_vclz_s32(int32x2_t a) { @@ -1536,7 +1536,7 @@ int32x2_t test_vclz_s32(int32x2_t a) { // CHECK-LABEL: @test_vclzq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #2 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VCLZQ_V1_I]] int32x4_t test_vclzq_s32(int32x4_t a) { @@ -1544,14 +1544,14 @@ int32x4_t test_vclzq_s32(int32x4_t a) { } // CHECK-LABEL: @test_vclz_u8( -// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #2 +// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) // CHECK: ret <8 x i8> [[VCLZ_V_I]] uint8x8_t test_vclz_u8(uint8x8_t a) { return vclz_u8(a); } // CHECK-LABEL: @test_vclzq_u8( -// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #2 +// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) // CHECK: ret <16 x i8> [[VCLZQ_V_I]] uint8x16_t test_vclzq_u8(uint8x16_t a) { return vclzq_u8(a); @@ -1559,7 +1559,7 @@ uint8x16_t test_vclzq_u8(uint8x16_t a) { // CHECK-LABEL: @test_vclz_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #2 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VCLZ_V1_I]] uint16x4_t test_vclz_u16(uint16x4_t a) { @@ -1568,7 +1568,7 @@ uint16x4_t test_vclz_u16(uint16x4_t a) { // CHECK-LABEL: @test_vclzq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #2 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VCLZQ_V1_I]] uint16x8_t test_vclzq_u16(uint16x8_t a) { @@ -1577,7 +1577,7 @@ uint16x8_t test_vclzq_u16(uint16x8_t a) { // CHECK-LABEL: @test_vclz_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #2 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VCLZ_V1_I]] uint32x2_t test_vclz_u32(uint32x2_t a) { @@ -1586,7 +1586,7 @@ uint32x2_t test_vclz_u32(uint32x2_t a) { // CHECK-LABEL: @test_vclzq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #2 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VCLZQ_V1_I]] uint32x4_t test_vclzq_u32(uint32x4_t a) { @@ -1594,42 +1594,42 @@ uint32x4_t test_vclzq_u32(uint32x4_t a) { } // CHECK-LABEL: @test_vcnt_s8( -// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2 +// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VCNT_V_I]] int8x8_t test_vcnt_s8(int8x8_t a) { return vcnt_s8(a); } // CHECK-LABEL: @test_vcntq_s8( -// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2 +// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VCNTQ_V_I]] int8x16_t test_vcntq_s8(int8x16_t a) { return vcntq_s8(a); } // CHECK-LABEL: @test_vcnt_u8( -// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2 +// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VCNT_V_I]] uint8x8_t test_vcnt_u8(uint8x8_t a) { return vcnt_u8(a); } // CHECK-LABEL: @test_vcntq_u8( -// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2 +// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VCNTQ_V_I]] uint8x16_t test_vcntq_u8(uint8x16_t a) { return vcntq_u8(a); } // CHECK-LABEL: @test_vcnt_p8( -// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2 +// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VCNT_V_I]] poly8x8_t test_vcnt_p8(poly8x8_t a) { return vcnt_p8(a); } // CHECK-LABEL: @test_vcntq_p8( -// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2 +// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VCNTQ_V_I]] poly8x16_t test_vcntq_p8(poly8x16_t a) { return vcntq_p8(a); @@ -1734,42 +1734,42 @@ poly8x16_t test_vmvnq_p8(poly8x16_t a) { } // CHECK-LABEL: @test_vrbit_s8( -// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2 +// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VRBIT_I]] int8x8_t test_vrbit_s8(int8x8_t a) { return vrbit_s8(a); } // CHECK-LABEL: @test_vrbitq_s8( -// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2 +// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VRBIT_I]] int8x16_t test_vrbitq_s8(int8x16_t a) { return vrbitq_s8(a); } // CHECK-LABEL: @test_vrbit_u8( -// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2 +// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VRBIT_I]] uint8x8_t test_vrbit_u8(uint8x8_t a) { return vrbit_u8(a); } // CHECK-LABEL: @test_vrbitq_u8( -// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2 +// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VRBIT_I]] uint8x16_t test_vrbitq_u8(uint8x16_t a) { return vrbitq_u8(a); } // CHECK-LABEL: @test_vrbit_p8( -// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2 +// CHECK: [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VRBIT_I]] poly8x8_t test_vrbit_p8(poly8x8_t a) { return vrbit_p8(a); } // CHECK-LABEL: @test_vrbitq_p8( -// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2 +// CHECK: [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VRBIT_I]] poly8x16_t test_vrbitq_p8(poly8x16_t a) { return vrbitq_p8(a); @@ -1879,7 +1879,7 @@ int32x4_t test_vmovn_high_u64(int32x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vqmovun_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %a) #2 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %a) // CHECK: ret <8 x i8> [[VQMOVUN_V1_I]] int8x8_t test_vqmovun_s16(int16x8_t a) { return vqmovun_s16(a); @@ -1887,7 +1887,7 @@ int8x8_t test_vqmovun_s16(int16x8_t a) { // CHECK-LABEL: @test_vqmovun_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %a) #2 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %a) // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQMOVUN_V1_I]] int16x4_t test_vqmovun_s32(int32x4_t a) { @@ -1896,7 +1896,7 @@ int16x4_t test_vqmovun_s32(int32x4_t a) { // CHECK-LABEL: @test_vqmovun_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %a) #2 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %a) // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQMOVUN_V1_I]] int32x2_t test_vqmovun_s64(int64x2_t a) { @@ -1905,7 +1905,7 @@ int32x2_t test_vqmovun_s64(int64x2_t a) { // CHECK-LABEL: @test_vqmovun_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %b) #2 +// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %b) // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVUN_V1_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] int8x16_t test_vqmovun_high_s16(int8x8_t a, int16x8_t b) { @@ -1914,7 +1914,7 @@ int8x16_t test_vqmovun_high_s16(int8x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqmovun_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %b) #2 +// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %b) // CHECK: [[VQMOVUN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVUN_V1_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -1924,7 +1924,7 @@ int16x8_t test_vqmovun_high_s32(int16x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqmovun_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %b) #2 +// CHECK: [[VQMOVUN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %b) // CHECK: [[VQMOVUN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVUN_V1_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -1934,7 +1934,7 @@ int32x4_t test_vqmovun_high_s64(int32x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vqmovn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %a) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %a) // CHECK: ret <8 x i8> [[VQMOVN_V1_I]] int8x8_t test_vqmovn_s16(int16x8_t a) { return vqmovn_s16(a); @@ -1942,7 +1942,7 @@ int8x8_t test_vqmovn_s16(int16x8_t a) { // CHECK-LABEL: @test_vqmovn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %a) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %a) // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQMOVN_V1_I]] int16x4_t test_vqmovn_s32(int32x4_t a) { @@ -1951,7 +1951,7 @@ int16x4_t test_vqmovn_s32(int32x4_t a) { // CHECK-LABEL: @test_vqmovn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %a) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %a) // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQMOVN_V1_I]] int32x2_t test_vqmovn_s64(int64x2_t a) { @@ -1960,7 +1960,7 @@ int32x2_t test_vqmovn_s64(int64x2_t a) { // CHECK-LABEL: @test_vqmovn_high_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %b) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %b) // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] int8x16_t test_vqmovn_high_s16(int8x8_t a, int16x8_t b) { @@ -1969,7 +1969,7 @@ int8x16_t test_vqmovn_high_s16(int8x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqmovn_high_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %b) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %b) // CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVN_V1_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -1979,7 +1979,7 @@ int16x8_t test_vqmovn_high_s32(int16x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqmovn_high_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %b) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %b) // CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVN_V1_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -1989,7 +1989,7 @@ int32x4_t test_vqmovn_high_s64(int32x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vqmovn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %a) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %a) // CHECK: ret <8 x i8> [[VQMOVN_V1_I]] uint8x8_t test_vqmovn_u16(uint16x8_t a) { return vqmovn_u16(a); @@ -1997,7 +1997,7 @@ uint8x8_t test_vqmovn_u16(uint16x8_t a) { // CHECK-LABEL: @test_vqmovn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %a) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %a) // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQMOVN_V1_I]] uint16x4_t test_vqmovn_u32(uint32x4_t a) { @@ -2006,7 +2006,7 @@ uint16x4_t test_vqmovn_u32(uint32x4_t a) { // CHECK-LABEL: @test_vqmovn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %a) #2 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %a) // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQMOVN_V1_I]] uint32x2_t test_vqmovn_u64(uint64x2_t a) { @@ -2015,7 +2015,7 @@ uint32x2_t test_vqmovn_u64(uint64x2_t a) { // CHECK-LABEL: @test_vqmovn_high_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %b) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %b) // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] uint8x16_t test_vqmovn_high_u16(uint8x8_t a, uint16x8_t b) { @@ -2024,7 +2024,7 @@ uint8x16_t test_vqmovn_high_u16(uint8x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vqmovn_high_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %b) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %b) // CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQMOVN_V1_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -2034,7 +2034,7 @@ uint16x8_t test_vqmovn_high_u32(uint16x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vqmovn_high_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %b) #2 +// CHECK: [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %b) // CHECK: [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQMOVN_V1_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -2162,7 +2162,7 @@ uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vcvt_f16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a) #2 +// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a) // CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half> // CHECK: ret <4 x half> [[TMP1]] @@ -2172,7 +2172,7 @@ float16x4_t test_vcvt_f16_f32(float32x4_t a) { // CHECK-LABEL: @test_vcvt_high_f16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %b) #2 +// CHECK: [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %b) // CHECK: [[VCVT_F16_F322_I_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I_I]] to <4 x half> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x half> %a, <4 x half> [[TMP1]], <8 x i32> @@ -2200,7 +2200,7 @@ float32x4_t test_vcvt_high_f32_f64(float32x2_t a, float64x2_t b) { // CHECK-LABEL: @test_vcvtx_f32_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTX_F32_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #2 +// CHECK: [[VCVTX_F32_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) // CHECK: ret <2 x float> [[VCVTX_F32_V1_I]] float32x2_t test_vcvtx_f32_f64(float64x2_t a) { return vcvtx_f32_f64(a); @@ -2208,7 +2208,7 @@ float32x2_t test_vcvtx_f32_f64(float64x2_t a) { // CHECK-LABEL: @test_vcvtx_high_f32_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[VCVTX_F32_V1_I_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #2 +// CHECK: [[VCVTX_F32_V1_I_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x float> %a, <2 x float> [[VCVTX_F32_V1_I_I]], <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I_I]] float32x4_t test_vcvtx_high_f32_f64(float32x2_t a, float64x2_t b) { @@ -2218,7 +2218,7 @@ float32x4_t test_vcvtx_high_f32_f64(float32x2_t a, float64x2_t b) { // CHECK-LABEL: @test_vcvt_f32_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> // CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) #2 +// CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) // CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8> // CHECK: ret <4 x float> [[VCVT_F32_F161_I]] float32x4_t test_vcvt_f32_f16(float16x4_t a) { @@ -2229,7 +2229,7 @@ float32x4_t test_vcvt_f32_f16(float16x4_t a) { // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[SHUFFLE_I_I]] to <8 x i8> // CHECK: [[VCVT_F32_F16_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VCVT_F32_F161_I_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I_I]]) #2 +// CHECK: [[VCVT_F32_F161_I_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I_I]]) // CHECK: [[VCVT_F32_F162_I_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I_I]] to <16 x i8> // CHECK: ret <4 x float> [[VCVT_F32_F161_I_I]] float32x4_t test_vcvt_high_f32_f16(float16x8_t a) { @@ -2255,7 +2255,7 @@ float64x2_t test_vcvt_high_f64_f32(float32x4_t a) { // CHECK-LABEL: @test_vrndn_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDN1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) #2 +// CHECK: [[VRNDN1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRNDN1_I]] float32x2_t test_vrndn_f32(float32x2_t a) { return vrndn_f32(a); @@ -2263,7 +2263,7 @@ float32x2_t test_vrndn_f32(float32x2_t a) { // CHECK-LABEL: @test_vrndnq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDN1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) #2 +// CHECK: [[VRNDN1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRNDN1_I]] float32x4_t test_vrndnq_f32(float32x4_t a) { return vrndnq_f32(a); @@ -2271,7 +2271,7 @@ float32x4_t test_vrndnq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrndnq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) #2 +// CHECK: [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VRNDN1_I]] float64x2_t test_vrndnq_f64(float64x2_t a) { return vrndnq_f64(a); @@ -2279,7 +2279,7 @@ float64x2_t test_vrndnq_f64(float64x2_t a) { // CHECK-LABEL: @test_vrnda_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDA1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> %a) #2 +// CHECK: [[VRNDA1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRNDA1_I]] float32x2_t test_vrnda_f32(float32x2_t a) { return vrnda_f32(a); @@ -2287,7 +2287,7 @@ float32x2_t test_vrnda_f32(float32x2_t a) { // CHECK-LABEL: @test_vrndaq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDA1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> %a) #2 +// CHECK: [[VRNDA1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRNDA1_I]] float32x4_t test_vrndaq_f32(float32x4_t a) { return vrndaq_f32(a); @@ -2295,7 +2295,7 @@ float32x4_t test_vrndaq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrndaq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> %a) #2 +// CHECK: [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VRNDA1_I]] float64x2_t test_vrndaq_f64(float64x2_t a) { return vrndaq_f64(a); @@ -2303,7 +2303,7 @@ float64x2_t test_vrndaq_f64(float64x2_t a) { // CHECK-LABEL: @test_vrndp_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDP1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #2 +// CHECK: [[VRNDP1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRNDP1_I]] float32x2_t test_vrndp_f32(float32x2_t a) { return vrndp_f32(a); @@ -2311,7 +2311,7 @@ float32x2_t test_vrndp_f32(float32x2_t a) { // CHECK-LABEL: @test_vrndpq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDP1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #2 +// CHECK: [[VRNDP1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRNDP1_I]] float32x4_t test_vrndpq_f32(float32x4_t a) { return vrndpq_f32(a); @@ -2319,7 +2319,7 @@ float32x4_t test_vrndpq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrndpq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #2 +// CHECK: [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VRNDP1_I]] float64x2_t test_vrndpq_f64(float64x2_t a) { return vrndpq_f64(a); @@ -2327,7 +2327,7 @@ float64x2_t test_vrndpq_f64(float64x2_t a) { // CHECK-LABEL: @test_vrndm_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDM1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #2 +// CHECK: [[VRNDM1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRNDM1_I]] float32x2_t test_vrndm_f32(float32x2_t a) { return vrndm_f32(a); @@ -2335,7 +2335,7 @@ float32x2_t test_vrndm_f32(float32x2_t a) { // CHECK-LABEL: @test_vrndmq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDM1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #2 +// CHECK: [[VRNDM1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRNDM1_I]] float32x4_t test_vrndmq_f32(float32x4_t a) { return vrndmq_f32(a); @@ -2343,7 +2343,7 @@ float32x4_t test_vrndmq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrndmq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #2 +// CHECK: [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VRNDM1_I]] float64x2_t test_vrndmq_f64(float64x2_t a) { return vrndmq_f64(a); @@ -2351,7 +2351,7 @@ float64x2_t test_vrndmq_f64(float64x2_t a) { // CHECK-LABEL: @test_vrndx_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDX1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #2 +// CHECK: [[VRNDX1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRNDX1_I]] float32x2_t test_vrndx_f32(float32x2_t a) { return vrndx_f32(a); @@ -2359,7 +2359,7 @@ float32x2_t test_vrndx_f32(float32x2_t a) { // CHECK-LABEL: @test_vrndxq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDX1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #2 +// CHECK: [[VRNDX1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRNDX1_I]] float32x4_t test_vrndxq_f32(float32x4_t a) { return vrndxq_f32(a); @@ -2367,7 +2367,7 @@ float32x4_t test_vrndxq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrndxq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #2 +// CHECK: [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VRNDX1_I]] float64x2_t test_vrndxq_f64(float64x2_t a) { return vrndxq_f64(a); @@ -2375,7 +2375,7 @@ float64x2_t test_vrndxq_f64(float64x2_t a) { // CHECK-LABEL: @test_vrnd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDZ1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #2 +// CHECK: [[VRNDZ1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRNDZ1_I]] float32x2_t test_vrnd_f32(float32x2_t a) { return vrnd_f32(a); @@ -2383,7 +2383,7 @@ float32x2_t test_vrnd_f32(float32x2_t a) { // CHECK-LABEL: @test_vrndq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDZ1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #2 +// CHECK: [[VRNDZ1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRNDZ1_I]] float32x4_t test_vrndq_f32(float32x4_t a) { return vrndq_f32(a); @@ -2391,7 +2391,7 @@ float32x4_t test_vrndq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrndq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #2 +// CHECK: [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VRNDZ1_I]] float64x2_t test_vrndq_f64(float64x2_t a) { return vrndq_f64(a); @@ -2399,7 +2399,7 @@ float64x2_t test_vrndq_f64(float64x2_t a) { // CHECK-LABEL: @test_vrndi_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRNDI1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #2 +// CHECK: [[VRNDI1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRNDI1_I]] float32x2_t test_vrndi_f32(float32x2_t a) { return vrndi_f32(a); @@ -2407,7 +2407,7 @@ float32x2_t test_vrndi_f32(float32x2_t a) { // CHECK-LABEL: @test_vrndiq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRNDI1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #2 +// CHECK: [[VRNDI1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRNDI1_I]] float32x4_t test_vrndiq_f32(float32x4_t a) { return vrndiq_f32(a); @@ -2415,7 +2415,7 @@ float32x4_t test_vrndiq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrndiq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRNDI1_I:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #2 +// CHECK: [[VRNDI1_I:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VRNDI1_I]] float64x2_t test_vrndiq_f64(float64x2_t a) { return vrndiq_f64(a); @@ -2471,7 +2471,7 @@ uint64x2_t test_vcvtq_u64_f64(float64x2_t a) { // CHECK-LABEL: @test_vcvtn_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %a) // CHECK: ret <2 x i32> [[VCVTN1_I]] int32x2_t test_vcvtn_s32_f32(float32x2_t a) { return vcvtn_s32_f32(a); @@ -2479,7 +2479,7 @@ int32x2_t test_vcvtn_s32_f32(float32x2_t a) { // CHECK-LABEL: @test_vcvtnq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %a) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %a) // CHECK: ret <4 x i32> [[VCVTN1_I]] int32x4_t test_vcvtnq_s32_f32(float32x4_t a) { return vcvtnq_s32_f32(a); @@ -2487,7 +2487,7 @@ int32x4_t test_vcvtnq_s32_f32(float32x4_t a) { // CHECK-LABEL: @test_vcvtnq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %a) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %a) // CHECK: ret <2 x i64> [[VCVTN1_I]] int64x2_t test_vcvtnq_s64_f64(float64x2_t a) { return vcvtnq_s64_f64(a); @@ -2495,7 +2495,7 @@ int64x2_t test_vcvtnq_s64_f64(float64x2_t a) { // CHECK-LABEL: @test_vcvtn_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a) // CHECK: ret <2 x i32> [[VCVTN1_I]] uint32x2_t test_vcvtn_u32_f32(float32x2_t a) { return vcvtn_u32_f32(a); @@ -2503,7 +2503,7 @@ uint32x2_t test_vcvtn_u32_f32(float32x2_t a) { // CHECK-LABEL: @test_vcvtnq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a) // CHECK: ret <4 x i32> [[VCVTN1_I]] uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) { return vcvtnq_u32_f32(a); @@ -2511,7 +2511,7 @@ uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) { // CHECK-LABEL: @test_vcvtnq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a) #2 +// CHECK: [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a) // CHECK: ret <2 x i64> [[VCVTN1_I]] uint64x2_t test_vcvtnq_u64_f64(float64x2_t a) { return vcvtnq_u64_f64(a); @@ -2519,7 +2519,7 @@ uint64x2_t test_vcvtnq_u64_f64(float64x2_t a) { // CHECK-LABEL: @test_vcvtp_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %a) // CHECK: ret <2 x i32> [[VCVTP1_I]] int32x2_t test_vcvtp_s32_f32(float32x2_t a) { return vcvtp_s32_f32(a); @@ -2527,7 +2527,7 @@ int32x2_t test_vcvtp_s32_f32(float32x2_t a) { // CHECK-LABEL: @test_vcvtpq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %a) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %a) // CHECK: ret <4 x i32> [[VCVTP1_I]] int32x4_t test_vcvtpq_s32_f32(float32x4_t a) { return vcvtpq_s32_f32(a); @@ -2535,7 +2535,7 @@ int32x4_t test_vcvtpq_s32_f32(float32x4_t a) { // CHECK-LABEL: @test_vcvtpq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %a) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %a) // CHECK: ret <2 x i64> [[VCVTP1_I]] int64x2_t test_vcvtpq_s64_f64(float64x2_t a) { return vcvtpq_s64_f64(a); @@ -2543,7 +2543,7 @@ int64x2_t test_vcvtpq_s64_f64(float64x2_t a) { // CHECK-LABEL: @test_vcvtp_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a) // CHECK: ret <2 x i32> [[VCVTP1_I]] uint32x2_t test_vcvtp_u32_f32(float32x2_t a) { return vcvtp_u32_f32(a); @@ -2551,7 +2551,7 @@ uint32x2_t test_vcvtp_u32_f32(float32x2_t a) { // CHECK-LABEL: @test_vcvtpq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a) // CHECK: ret <4 x i32> [[VCVTP1_I]] uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) { return vcvtpq_u32_f32(a); @@ -2559,7 +2559,7 @@ uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) { // CHECK-LABEL: @test_vcvtpq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a) #2 +// CHECK: [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a) // CHECK: ret <2 x i64> [[VCVTP1_I]] uint64x2_t test_vcvtpq_u64_f64(float64x2_t a) { return vcvtpq_u64_f64(a); @@ -2567,7 +2567,7 @@ uint64x2_t test_vcvtpq_u64_f64(float64x2_t a) { // CHECK-LABEL: @test_vcvtm_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %a) // CHECK: ret <2 x i32> [[VCVTM1_I]] int32x2_t test_vcvtm_s32_f32(float32x2_t a) { return vcvtm_s32_f32(a); @@ -2575,7 +2575,7 @@ int32x2_t test_vcvtm_s32_f32(float32x2_t a) { // CHECK-LABEL: @test_vcvtmq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %a) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %a) // CHECK: ret <4 x i32> [[VCVTM1_I]] int32x4_t test_vcvtmq_s32_f32(float32x4_t a) { return vcvtmq_s32_f32(a); @@ -2583,7 +2583,7 @@ int32x4_t test_vcvtmq_s32_f32(float32x4_t a) { // CHECK-LABEL: @test_vcvtmq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %a) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %a) // CHECK: ret <2 x i64> [[VCVTM1_I]] int64x2_t test_vcvtmq_s64_f64(float64x2_t a) { return vcvtmq_s64_f64(a); @@ -2591,7 +2591,7 @@ int64x2_t test_vcvtmq_s64_f64(float64x2_t a) { // CHECK-LABEL: @test_vcvtm_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a) // CHECK: ret <2 x i32> [[VCVTM1_I]] uint32x2_t test_vcvtm_u32_f32(float32x2_t a) { return vcvtm_u32_f32(a); @@ -2599,7 +2599,7 @@ uint32x2_t test_vcvtm_u32_f32(float32x2_t a) { // CHECK-LABEL: @test_vcvtmq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a) // CHECK: ret <4 x i32> [[VCVTM1_I]] uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) { return vcvtmq_u32_f32(a); @@ -2607,7 +2607,7 @@ uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) { // CHECK-LABEL: @test_vcvtmq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a) #2 +// CHECK: [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a) // CHECK: ret <2 x i64> [[VCVTM1_I]] uint64x2_t test_vcvtmq_u64_f64(float64x2_t a) { return vcvtmq_u64_f64(a); @@ -2615,7 +2615,7 @@ uint64x2_t test_vcvtmq_u64_f64(float64x2_t a) { // CHECK-LABEL: @test_vcvta_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %a) // CHECK: ret <2 x i32> [[VCVTA1_I]] int32x2_t test_vcvta_s32_f32(float32x2_t a) { return vcvta_s32_f32(a); @@ -2623,7 +2623,7 @@ int32x2_t test_vcvta_s32_f32(float32x2_t a) { // CHECK-LABEL: @test_vcvtaq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %a) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %a) // CHECK: ret <4 x i32> [[VCVTA1_I]] int32x4_t test_vcvtaq_s32_f32(float32x4_t a) { return vcvtaq_s32_f32(a); @@ -2631,7 +2631,7 @@ int32x4_t test_vcvtaq_s32_f32(float32x4_t a) { // CHECK-LABEL: @test_vcvtaq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %a) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %a) // CHECK: ret <2 x i64> [[VCVTA1_I]] int64x2_t test_vcvtaq_s64_f64(float64x2_t a) { return vcvtaq_s64_f64(a); @@ -2639,7 +2639,7 @@ int64x2_t test_vcvtaq_s64_f64(float64x2_t a) { // CHECK-LABEL: @test_vcvta_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %a) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %a) // CHECK: ret <2 x i32> [[VCVTA1_I]] uint32x2_t test_vcvta_u32_f32(float32x2_t a) { return vcvta_u32_f32(a); @@ -2647,7 +2647,7 @@ uint32x2_t test_vcvta_u32_f32(float32x2_t a) { // CHECK-LABEL: @test_vcvtaq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %a) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %a) // CHECK: ret <4 x i32> [[VCVTA1_I]] uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) { return vcvtaq_u32_f32(a); @@ -2655,7 +2655,7 @@ uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) { // CHECK-LABEL: @test_vcvtaq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %a) #2 +// CHECK: [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %a) // CHECK: ret <2 x i64> [[VCVTA1_I]] uint64x2_t test_vcvtaq_u64_f64(float64x2_t a) { return vcvtaq_u64_f64(a); @@ -2663,7 +2663,7 @@ uint64x2_t test_vcvtaq_u64_f64(float64x2_t a) { // CHECK-LABEL: @test_vrsqrte_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %a) #2 +// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRSQRTE_V1_I]] float32x2_t test_vrsqrte_f32(float32x2_t a) { return vrsqrte_f32(a); @@ -2671,7 +2671,7 @@ float32x2_t test_vrsqrte_f32(float32x2_t a) { // CHECK-LABEL: @test_vrsqrteq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %a) #2 +// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]] float32x4_t test_vrsqrteq_f32(float32x4_t a) { return vrsqrteq_f32(a); @@ -2679,7 +2679,7 @@ float32x4_t test_vrsqrteq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrsqrteq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %a) #2 +// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VRSQRTEQ_V1_I]] float64x2_t test_vrsqrteq_f64(float64x2_t a) { return vrsqrteq_f64(a); @@ -2687,7 +2687,7 @@ float64x2_t test_vrsqrteq_f64(float64x2_t a) { // CHECK-LABEL: @test_vrecpe_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %a) #2 +// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRECPE_V1_I]] float32x2_t test_vrecpe_f32(float32x2_t a) { return vrecpe_f32(a); @@ -2695,7 +2695,7 @@ float32x2_t test_vrecpe_f32(float32x2_t a) { // CHECK-LABEL: @test_vrecpeq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %a) #2 +// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRECPEQ_V1_I]] float32x4_t test_vrecpeq_f32(float32x4_t a) { return vrecpeq_f32(a); @@ -2703,7 +2703,7 @@ float32x4_t test_vrecpeq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrecpeq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VRECPEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %a) #2 +// CHECK: [[VRECPEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VRECPEQ_V1_I]] float64x2_t test_vrecpeq_f64(float64x2_t a) { return vrecpeq_f64(a); @@ -2711,7 +2711,7 @@ float64x2_t test_vrecpeq_f64(float64x2_t a) { // CHECK-LABEL: @test_vrecpe_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %a) #2 +// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %a) // CHECK: ret <2 x i32> [[VRECPE_V1_I]] uint32x2_t test_vrecpe_u32(uint32x2_t a) { return vrecpe_u32(a); @@ -2719,7 +2719,7 @@ uint32x2_t test_vrecpe_u32(uint32x2_t a) { // CHECK-LABEL: @test_vrecpeq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %a) #2 +// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %a) // CHECK: ret <4 x i32> [[VRECPEQ_V1_I]] uint32x4_t test_vrecpeq_u32(uint32x4_t a) { return vrecpeq_u32(a); @@ -2727,7 +2727,7 @@ uint32x4_t test_vrecpeq_u32(uint32x4_t a) { // CHECK-LABEL: @test_vsqrt_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2 +// CHECK: [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VSQRT_I]] float32x2_t test_vsqrt_f32(float32x2_t a) { return vsqrt_f32(a); @@ -2735,7 +2735,7 @@ float32x2_t test_vsqrt_f32(float32x2_t a) { // CHECK-LABEL: @test_vsqrtq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2 +// CHECK: [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VSQRT_I]] float32x4_t test_vsqrtq_f32(float32x4_t a) { return vsqrtq_f32(a); @@ -2743,7 +2743,7 @@ float32x4_t test_vsqrtq_f32(float32x4_t a) { // CHECK-LABEL: @test_vsqrtq_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2 +// CHECK: [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) // CHECK: ret <2 x double> [[VSQRT_I]] float64x2_t test_vsqrtq_f64(float64x2_t a) { return vsqrtq_f64(a); diff --git a/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c b/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c index ac5a090fd2e6d..01296c9359331 100644 --- a/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c +++ b/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c @@ -59,7 +59,7 @@ float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]]) #2 +// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]]) // CHECK: ret float [[VMULXS_F32_I]] float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) { return vmulxs_lane_f32(a, b, 1); @@ -69,7 +69,7 @@ float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 -// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]]) #2 +// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]]) // CHECK: ret float [[VMULXS_F32_I]] float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) { return vmulxs_laneq_f32(a, b, 3); @@ -79,7 +79,7 @@ float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 -// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]]) #2 +// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]]) // CHECK: ret double [[VMULXD_F64_I]] float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) { return vmulxd_lane_f64(a, b, 0); @@ -89,7 +89,7 @@ float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]]) #2 +// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]]) // CHECK: ret double [[VMULXD_F64_I]] float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) { return vmulxd_laneq_f64(a, b, 1); @@ -102,7 +102,7 @@ float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) { // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> // CHECK: [[VGET_LANE6:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 -// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]]) #2 +// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]]) // CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0 @@ -119,7 +119,7 @@ float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) { // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 -// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) #2 +// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) // CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0 @@ -135,7 +135,7 @@ float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) { // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 -// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) #2 +// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) // CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0 @@ -252,7 +252,7 @@ float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 -// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) #2 +// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) // CHECK: [[TMP4:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0 // CHECK: ret i32 [[TMP4]] int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) { @@ -263,7 +263,7 @@ int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]]) #2 +// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]]) // CHECK: ret i64 [[VQDMULLS_S32_I]] int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) { return vqdmulls_lane_s32(a, b, 1); @@ -275,7 +275,7 @@ int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 -// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) #2 +// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) // CHECK: [[TMP4:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0 // CHECK: ret i32 [[TMP4]] int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) { @@ -286,7 +286,7 @@ int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 -// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]]) #2 +// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]]) // CHECK: ret i64 [[VQDMULLS_S32_I]] int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) { return vqdmulls_laneq_s32(a, b, 3); @@ -298,7 +298,7 @@ int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) { // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 -// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) #2 +// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) // CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0 // CHECK: ret i16 [[TMP4]] int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) { @@ -309,7 +309,7 @@ int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]]) #2 +// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]]) // CHECK: ret i32 [[VQDMULHS_S32_I]] int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) { return vqdmulhs_lane_s32(a, b, 1); @@ -322,7 +322,7 @@ int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 -// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) #2 +// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) // CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0 // CHECK: ret i16 [[TMP4]] int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) { @@ -334,7 +334,7 @@ int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 -// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]]) #2 +// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]]) // CHECK: ret i32 [[VQDMULHS_S32_I]] int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) { return vqdmulhs_laneq_s32(a, b, 3); @@ -346,7 +346,7 @@ int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) { // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 -// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) #2 +// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) // CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 // CHECK: ret i16 [[TMP4]] int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) { @@ -357,7 +357,7 @@ int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]]) #2 +// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]]) // CHECK: ret i32 [[VQRDMULHS_S32_I]] int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) { return vqrdmulhs_lane_s32(a, b, 1); @@ -370,7 +370,7 @@ int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) { // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 -// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) #2 +// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) // CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 // CHECK: ret i16 [[TMP4]] int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) { @@ -382,7 +382,7 @@ int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 -// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]]) #2 +// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]]) // CHECK: ret i32 [[VQRDMULHS_S32_I]] int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) { return vqrdmulhs_laneq_s32(a, b, 3); @@ -497,7 +497,7 @@ int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) { // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP1]] to <8 x i8> // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP5]], i32 0 -// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]]) #2 +// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]]) // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]], double [[VMULXD_F64_I]], i32 0 @@ -523,7 +523,7 @@ float64x1_t test_vmulx_lane_f64_0() { // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[SHUFFLE_I]] to <16 x i8> // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) #2 +// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]], double [[VMULXD_F64_I]], i32 0 diff --git a/test/CodeGen/arm_neon_intrinsics.c b/test/CodeGen/arm_neon_intrinsics.c index ad8587b0db2a4..a8b03b5d9b0bf 100644 --- a/test/CodeGen/arm_neon_intrinsics.c +++ b/test/CodeGen/arm_neon_intrinsics.c @@ -7,7 +7,7 @@ #include // CHECK-LABEL: @test_vaba_s8( -// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]] // CHECK: ret <8 x i8> [[ADD_I]] int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) { @@ -17,7 +17,7 @@ int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) { // CHECK-LABEL: @test_vaba_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c) // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]] // CHECK: ret <4 x i16> [[ADD_I]] @@ -28,7 +28,7 @@ int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: @test_vaba_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c) // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]] // CHECK: ret <2 x i32> [[ADD_I]] @@ -37,7 +37,7 @@ int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) { } // CHECK-LABEL: @test_vaba_u8( -// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]] // CHECK: ret <8 x i8> [[ADD_I]] uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { @@ -47,7 +47,7 @@ uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { // CHECK-LABEL: @test_vaba_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c) // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]] // CHECK: ret <4 x i16> [[ADD_I]] @@ -58,7 +58,7 @@ uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { // CHECK-LABEL: @test_vaba_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c) // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]] // CHECK: ret <2 x i32> [[ADD_I]] @@ -67,7 +67,7 @@ uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { } // CHECK-LABEL: @test_vabaq_s8( -// CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c) #4 +// CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c) // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]] // CHECK: ret <16 x i8> [[ADD_I]] int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { @@ -77,7 +77,7 @@ int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { // CHECK-LABEL: @test_vabaq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8> -// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %b, <8 x i16> %c) #4 +// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %b, <8 x i16> %c) // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -88,7 +88,7 @@ int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { // CHECK-LABEL: @test_vabaq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8> -// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %b, <4 x i32> %c) #4 +// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %b, <4 x i32> %c) // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] @@ -97,7 +97,7 @@ int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { } // CHECK-LABEL: @test_vabaq_u8( -// CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c) #4 +// CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c) // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]] // CHECK: ret <16 x i8> [[ADD_I]] uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { @@ -107,7 +107,7 @@ uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { // CHECK-LABEL: @test_vabaq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8> -// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %b, <8 x i16> %c) #4 +// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %b, <8 x i16> %c) // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -118,7 +118,7 @@ uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { // CHECK-LABEL: @test_vabaq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8> -// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %b, <4 x i32> %c) #4 +// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %b, <4 x i32> %c) // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] @@ -127,7 +127,7 @@ uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { } // CHECK-LABEL: @test_vabal_s8( -// CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -138,7 +138,7 @@ int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { // CHECK-LABEL: @test_vabal_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c) // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32> @@ -151,7 +151,7 @@ int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: @test_vabal_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c) // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64> @@ -162,7 +162,7 @@ int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { } // CHECK-LABEL: @test_vabal_u8( -// CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -173,7 +173,7 @@ uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { // CHECK-LABEL: @test_vabal_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c) // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32> @@ -186,7 +186,7 @@ uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { // CHECK-LABEL: @test_vabal_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c) // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64> @@ -197,7 +197,7 @@ uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { } // CHECK-LABEL: @test_vabd_s8( -// CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VABD_V_I]] int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) { return vabd_s8(a, b); @@ -206,7 +206,7 @@ int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vabd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VABD_V2_I]] int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) { @@ -216,7 +216,7 @@ int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vabd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VABD_V2_I]] int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) { @@ -224,7 +224,7 @@ int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vabd_u8( -// CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VABD_V_I]] uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) { return vabd_u8(a, b); @@ -233,7 +233,7 @@ uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vabd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VABD_V2_I]] uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) { @@ -243,7 +243,7 @@ uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vabd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VABD_V2_I]] uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) { @@ -253,7 +253,7 @@ uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vabd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VABD_V2_I]] float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) { @@ -261,7 +261,7 @@ float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) { } // CHECK-LABEL: @test_vabdq_s8( -// CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VABDQ_V_I]] int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) { return vabdq_s8(a, b); @@ -270,7 +270,7 @@ int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vabdq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VABDQ_V2_I]] int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) { @@ -280,7 +280,7 @@ int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vabdq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VABDQ_V2_I]] int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) { @@ -288,7 +288,7 @@ int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vabdq_u8( -// CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VABDQ_V_I]] uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) { return vabdq_u8(a, b); @@ -297,7 +297,7 @@ uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vabdq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VABDQ_V2_I]] uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) { @@ -307,7 +307,7 @@ uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vabdq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VABDQ_V2_I]] uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) { @@ -317,7 +317,7 @@ uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vabdq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x float> [[VABDQ_V2_I]] float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) { @@ -325,7 +325,7 @@ float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) { } // CHECK-LABEL: @test_vabdl_s8( -// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I_I]] int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { @@ -335,7 +335,7 @@ int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vabdl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32> @@ -347,7 +347,7 @@ int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vabdl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64> @@ -357,7 +357,7 @@ int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vabdl_u8( -// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I_I]] uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { @@ -367,7 +367,7 @@ uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vabdl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32> @@ -379,7 +379,7 @@ uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vabdl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64> @@ -389,7 +389,7 @@ uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vabs_s8( -// CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4 +// CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VABS_I]] int8x8_t test_vabs_s8(int8x8_t a) { return vabs_s8(a); @@ -397,7 +397,7 @@ int8x8_t test_vabs_s8(int8x8_t a) { // CHECK-LABEL: @test_vabs_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) #4 +// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) // CHECK: ret <4 x i16> [[VABS1_I]] int16x4_t test_vabs_s16(int16x4_t a) { return vabs_s16(a); @@ -405,7 +405,7 @@ int16x4_t test_vabs_s16(int16x4_t a) { // CHECK-LABEL: @test_vabs_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) #4 +// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) // CHECK: ret <2 x i32> [[VABS1_I]] int32x2_t test_vabs_s32(int32x2_t a) { return vabs_s32(a); @@ -413,14 +413,14 @@ int32x2_t test_vabs_s32(int32x2_t a) { // CHECK-LABEL: @test_vabs_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4 +// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VABS1_I]] float32x2_t test_vabs_f32(float32x2_t a) { return vabs_f32(a); } // CHECK-LABEL: @test_vabsq_s8( -// CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4 +// CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VABS_I]] int8x16_t test_vabsq_s8(int8x16_t a) { return vabsq_s8(a); @@ -428,7 +428,7 @@ int8x16_t test_vabsq_s8(int8x16_t a) { // CHECK-LABEL: @test_vabsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) #4 +// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) // CHECK: ret <8 x i16> [[VABS1_I]] int16x8_t test_vabsq_s16(int16x8_t a) { return vabsq_s16(a); @@ -436,7 +436,7 @@ int16x8_t test_vabsq_s16(int16x8_t a) { // CHECK-LABEL: @test_vabsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) #4 +// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) // CHECK: ret <4 x i32> [[VABS1_I]] int32x4_t test_vabsq_s32(int32x4_t a) { return vabsq_s32(a); @@ -444,7 +444,7 @@ int32x4_t test_vabsq_s32(int32x4_t a) { // CHECK-LABEL: @test_vabsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4 +// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VABS1_I]] float32x4_t test_vabsq_f32(float32x4_t a) { return vabsq_f32(a); @@ -997,7 +997,7 @@ uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vbsl_s8( -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) // CHECK: ret <8 x i8> [[VBSL_V_I]] int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) { return vbsl_s8(a, b, c); @@ -1007,7 +1007,7 @@ int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> // CHECK: ret <4 x i16> [[TMP3]] int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) { @@ -1018,7 +1018,7 @@ int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32> // CHECK: ret <2 x i32> [[TMP3]] int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) { @@ -1029,7 +1029,7 @@ int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64> // CHECK: ret <1 x i64> [[TMP3]] int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) { @@ -1037,7 +1037,7 @@ int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) { } // CHECK-LABEL: @test_vbsl_u8( -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) // CHECK: ret <8 x i8> [[VBSL_V_I]] uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { return vbsl_u8(a, b, c); @@ -1047,7 +1047,7 @@ uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> // CHECK: ret <4 x i16> [[TMP3]] uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { @@ -1058,7 +1058,7 @@ uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32> // CHECK: ret <2 x i32> [[TMP3]] uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { @@ -1069,7 +1069,7 @@ uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64> // CHECK: ret <1 x i64> [[TMP3]] uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) { @@ -1080,7 +1080,7 @@ uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x float> // CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) { @@ -1088,7 +1088,7 @@ float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) { } // CHECK-LABEL: @test_vbsl_p8( -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) // CHECK: ret <8 x i8> [[VBSL_V_I]] poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) { return vbsl_p8(a, b, c); @@ -1098,7 +1098,7 @@ poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> // CHECK: ret <4 x i16> [[TMP3]] poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) { @@ -1106,7 +1106,7 @@ poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) { } // CHECK-LABEL: @test_vbslq_s8( -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) // CHECK: ret <16 x i8> [[VBSLQ_V_I]] int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) { return vbslq_s8(a, b, c); @@ -1116,7 +1116,7 @@ int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16> // CHECK: ret <8 x i16> [[TMP3]] int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) { @@ -1127,7 +1127,7 @@ int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8> -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32> // CHECK: ret <4 x i32> [[TMP3]] int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) { @@ -1138,7 +1138,7 @@ int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64> // CHECK: ret <2 x i64> [[TMP3]] int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) { @@ -1146,7 +1146,7 @@ int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) { } // CHECK-LABEL: @test_vbslq_u8( -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) // CHECK: ret <16 x i8> [[VBSLQ_V_I]] uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { return vbslq_u8(a, b, c); @@ -1156,7 +1156,7 @@ uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16> // CHECK: ret <8 x i16> [[TMP3]] uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { @@ -1167,7 +1167,7 @@ uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8> -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32> // CHECK: ret <4 x i32> [[TMP3]] uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { @@ -1178,7 +1178,7 @@ uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64> // CHECK: ret <2 x i64> [[TMP3]] uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) { @@ -1189,7 +1189,7 @@ uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x float> // CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) { @@ -1197,7 +1197,7 @@ float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) { } // CHECK-LABEL: @test_vbslq_p8( -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) // CHECK: ret <16 x i8> [[VBSLQ_V_I]] poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) { return vbslq_p8(a, b, c); @@ -1207,7 +1207,7 @@ poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> -// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 +// CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16> // CHECK: ret <8 x i16> [[TMP3]] poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) { @@ -1217,7 +1217,7 @@ poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) { // CHECK-LABEL: @test_vcage_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x i32> [[VCAGE_V2_I]] uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) { return vcage_f32(a, b); @@ -1226,7 +1226,7 @@ uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vcageq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]] uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) { return vcageq_f32(a, b); @@ -1235,7 +1235,7 @@ uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vcagt_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: ret <2 x i32> [[VCAGT_V2_I]] uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) { return vcagt_f32(a, b); @@ -1244,7 +1244,7 @@ uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vcagtq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]] uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) { return vcagtq_f32(a, b); @@ -1253,7 +1253,7 @@ uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vcale_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %b, <2 x float> %a) #4 +// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %b, <2 x float> %a) // CHECK: ret <2 x i32> [[VCALE_V2_I]] uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) { return vcale_f32(a, b); @@ -1262,7 +1262,7 @@ uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vcaleq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %b, <4 x float> %a) #4 +// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %b, <4 x float> %a) // CHECK: ret <4 x i32> [[VCALEQ_V2_I]] uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) { return vcaleq_f32(a, b); @@ -1271,7 +1271,7 @@ uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) { // CHECK-LABEL: @test_vcalt_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %b, <2 x float> %a) #4 +// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %b, <2 x float> %a) // CHECK: ret <2 x i32> [[VCALT_V2_I]] uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) { return vcalt_f32(a, b); @@ -1280,7 +1280,7 @@ uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vcaltq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %b, <4 x float> %a) #4 +// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %b, <4 x float> %a) // CHECK: ret <4 x i32> [[VCALTQ_V2_I]] uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) { return vcaltq_f32(a, b); @@ -1751,7 +1751,7 @@ uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: @test_vcls_s8( -// CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4 +// CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VCLS_V_I]] int8x8_t test_vcls_s8(int8x8_t a) { return vcls_s8(a); @@ -1759,7 +1759,7 @@ int8x8_t test_vcls_s8(int8x8_t a) { // CHECK-LABEL: @test_vcls_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) #4 +// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VCLS_V1_I]] int16x4_t test_vcls_s16(int16x4_t a) { @@ -1768,7 +1768,7 @@ int16x4_t test_vcls_s16(int16x4_t a) { // CHECK-LABEL: @test_vcls_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) #4 +// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VCLS_V1_I]] int32x2_t test_vcls_s32(int32x2_t a) { @@ -1776,7 +1776,7 @@ int32x2_t test_vcls_s32(int32x2_t a) { } // CHECK-LABEL: @test_vclsq_s8( -// CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4 +// CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VCLSQ_V_I]] int8x16_t test_vclsq_s8(int8x16_t a) { return vclsq_s8(a); @@ -1784,7 +1784,7 @@ int8x16_t test_vclsq_s8(int8x16_t a) { // CHECK-LABEL: @test_vclsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) #4 +// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VCLSQ_V1_I]] int16x8_t test_vclsq_s16(int16x8_t a) { @@ -1793,7 +1793,7 @@ int16x8_t test_vclsq_s16(int16x8_t a) { // CHECK-LABEL: @test_vclsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) #4 +// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VCLSQ_V1_I]] int32x4_t test_vclsq_s32(int32x4_t a) { @@ -1913,7 +1913,7 @@ uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: @test_vclz_s8( -// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 +// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) // CHECK: ret <8 x i8> [[VCLZ_V_I]] int8x8_t test_vclz_s8(int8x8_t a) { return vclz_s8(a); @@ -1921,7 +1921,7 @@ int8x8_t test_vclz_s8(int8x8_t a) { // CHECK-LABEL: @test_vclz_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VCLZ_V1_I]] int16x4_t test_vclz_s16(int16x4_t a) { @@ -1930,7 +1930,7 @@ int16x4_t test_vclz_s16(int16x4_t a) { // CHECK-LABEL: @test_vclz_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VCLZ_V1_I]] int32x2_t test_vclz_s32(int32x2_t a) { @@ -1938,7 +1938,7 @@ int32x2_t test_vclz_s32(int32x2_t a) { } // CHECK-LABEL: @test_vclz_u8( -// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 +// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) // CHECK: ret <8 x i8> [[VCLZ_V_I]] uint8x8_t test_vclz_u8(uint8x8_t a) { return vclz_u8(a); @@ -1946,7 +1946,7 @@ uint8x8_t test_vclz_u8(uint8x8_t a) { // CHECK-LABEL: @test_vclz_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VCLZ_V1_I]] uint16x4_t test_vclz_u16(uint16x4_t a) { @@ -1955,7 +1955,7 @@ uint16x4_t test_vclz_u16(uint16x4_t a) { // CHECK-LABEL: @test_vclz_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VCLZ_V1_I]] uint32x2_t test_vclz_u32(uint32x2_t a) { @@ -1963,7 +1963,7 @@ uint32x2_t test_vclz_u32(uint32x2_t a) { } // CHECK-LABEL: @test_vclzq_s8( -// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 +// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) // CHECK: ret <16 x i8> [[VCLZQ_V_I]] int8x16_t test_vclzq_s8(int8x16_t a) { return vclzq_s8(a); @@ -1971,7 +1971,7 @@ int8x16_t test_vclzq_s8(int8x16_t a) { // CHECK-LABEL: @test_vclzq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VCLZQ_V1_I]] int16x8_t test_vclzq_s16(int16x8_t a) { @@ -1980,7 +1980,7 @@ int16x8_t test_vclzq_s16(int16x8_t a) { // CHECK-LABEL: @test_vclzq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VCLZQ_V1_I]] int32x4_t test_vclzq_s32(int32x4_t a) { @@ -1988,7 +1988,7 @@ int32x4_t test_vclzq_s32(int32x4_t a) { } // CHECK-LABEL: @test_vclzq_u8( -// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 +// CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) // CHECK: ret <16 x i8> [[VCLZQ_V_I]] uint8x16_t test_vclzq_u8(uint8x16_t a) { return vclzq_u8(a); @@ -1996,7 +1996,7 @@ uint8x16_t test_vclzq_u8(uint8x16_t a) { // CHECK-LABEL: @test_vclzq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VCLZQ_V1_I]] uint16x8_t test_vclzq_u16(uint16x8_t a) { @@ -2005,7 +2005,7 @@ uint16x8_t test_vclzq_u16(uint16x8_t a) { // CHECK-LABEL: @test_vclzq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VCLZQ_V1_I]] uint32x4_t test_vclzq_u32(uint32x4_t a) { @@ -2013,42 +2013,42 @@ uint32x4_t test_vclzq_u32(uint32x4_t a) { } // CHECK-LABEL: @test_vcnt_u8( -// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 +// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VCNT_V_I]] uint8x8_t test_vcnt_u8(uint8x8_t a) { return vcnt_u8(a); } // CHECK-LABEL: @test_vcnt_s8( -// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 +// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VCNT_V_I]] int8x8_t test_vcnt_s8(int8x8_t a) { return vcnt_s8(a); } // CHECK-LABEL: @test_vcnt_p8( -// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 +// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VCNT_V_I]] poly8x8_t test_vcnt_p8(poly8x8_t a) { return vcnt_p8(a); } // CHECK-LABEL: @test_vcntq_u8( -// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 +// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VCNTQ_V_I]] uint8x16_t test_vcntq_u8(uint8x16_t a) { return vcntq_u8(a); } // CHECK-LABEL: @test_vcntq_s8( -// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 +// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VCNTQ_V_I]] int8x16_t test_vcntq_s8(int8x16_t a) { return vcntq_s8(a); } // CHECK-LABEL: @test_vcntq_p8( -// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 +// CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VCNTQ_V_I]] poly8x16_t test_vcntq_p8(poly8x16_t a) { return vcntq_p8(a); @@ -2140,7 +2140,7 @@ poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) { // CHECK-LABEL: @test_vcreate_s8( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4 +// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) // CHECK: ret <8 x i8> [[VCLZ_V_I]] int8x8_t test_vcreate_s8(uint64_t a) { return vclz_s8(vcreate_s8(a)); @@ -2149,7 +2149,7 @@ int8x8_t test_vcreate_s8(uint64_t a) { // CHECK-LABEL: @test_vcreate_s16( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VCLZ_V1_I]] int16x4_t test_vcreate_s16(uint64_t a) { @@ -2159,7 +2159,7 @@ int16x4_t test_vcreate_s16(uint64_t a) { // CHECK-LABEL: @test_vcreate_s32( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VCLZ_V1_I]] int32x2_t test_vcreate_s32(uint64_t a) { @@ -2182,7 +2182,7 @@ float32x2_t test_vcreate_f32(uint64_t a) { // CHECK-LABEL: @test_vcreate_u8( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4 +// CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) // CHECK: ret <8 x i8> [[VCLZ_V_I]] uint8x8_t test_vcreate_u8(uint64_t a) { return vclz_s8(vcreate_u8(a)); @@ -2191,7 +2191,7 @@ uint8x8_t test_vcreate_u8(uint64_t a) { // CHECK-LABEL: @test_vcreate_u16( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VCLZ_V1_I]] uint16x4_t test_vcreate_u16(uint64_t a) { @@ -2201,7 +2201,7 @@ uint16x4_t test_vcreate_u16(uint64_t a) { // CHECK-LABEL: @test_vcreate_u32( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VCLZ_V1_I]] uint32x2_t test_vcreate_u32(uint64_t a) { @@ -2219,7 +2219,7 @@ uint64x1_t test_vcreate_u64(uint64_t a) { // CHECK-LABEL: @test_vcreate_p8( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> -// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]]) #4 +// CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]]) // CHECK: ret <8 x i8> [[VCNT_V_I]] poly8x8_t test_vcreate_p8(uint64_t a) { return vcnt_p8(vcreate_p8(a)); @@ -2230,7 +2230,7 @@ poly8x8_t test_vcreate_p8(uint64_t a) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]]) #4 +// CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]]) // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> // CHECK: ret <4 x i16> [[TMP4]] poly16x4_t test_vcreate_p16(uint64_t a) { @@ -2249,7 +2249,7 @@ int64x1_t test_vcreate_s64(uint64_t a) { // CHECK-LABEL: @test_vcvt_f16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) #4 +// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) // CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half> // CHECK: ret <4 x half> [[TMP1]] @@ -2292,7 +2292,7 @@ float32x4_t test_vcvtq_f32_u32(uint32x4_t a) { // CHECK-LABEL: @test_vcvt_f32_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> // CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) #4 +// CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) // CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8> // CHECK: ret <4 x float> [[VCVT_F32_F161_I]] float32x4_t test_vcvt_f32_f16(float16x4_t a) { @@ -3189,7 +3189,7 @@ float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> -// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a) #4 +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a) // CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vfma_f32(a, b, c); @@ -3199,7 +3199,7 @@ float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> -// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a) #4 +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a) // CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { return vfmaq_f32(a, b, c); @@ -3210,7 +3210,7 @@ float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> -// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %c, <2 x float> %a) #4 +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %c, <2 x float> %a) // CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vfms_f32(a, b, c); @@ -3221,7 +3221,7 @@ float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> -// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %c, <4 x float> %a) #4 +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %c, <4 x float> %a) // CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { return vfmsq_f32(a, b, c); @@ -3618,7 +3618,7 @@ poly16x4_t test_vget_low_p16(poly16x8_t a) { } // CHECK-LABEL: @test_vhadd_s8( -// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VHADD_V_I]] int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) { return vhadd_s8(a, b); @@ -3627,7 +3627,7 @@ int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vhadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VHADD_V2_I]] int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) { @@ -3637,7 +3637,7 @@ int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vhadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VHADD_V2_I]] int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) { @@ -3645,7 +3645,7 @@ int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vhadd_u8( -// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VHADD_V_I]] uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) { return vhadd_u8(a, b); @@ -3654,7 +3654,7 @@ uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vhadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VHADD_V2_I]] uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) { @@ -3664,7 +3664,7 @@ uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vhadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VHADD_V2_I]] uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) { @@ -3672,7 +3672,7 @@ uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vhaddq_s8( -// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VHADDQ_V_I]] int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) { return vhaddq_s8(a, b); @@ -3681,7 +3681,7 @@ int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vhaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VHADDQ_V2_I]] int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) { @@ -3691,7 +3691,7 @@ int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vhaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VHADDQ_V2_I]] int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) { @@ -3699,7 +3699,7 @@ int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vhaddq_u8( -// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VHADDQ_V_I]] uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) { return vhaddq_u8(a, b); @@ -3708,7 +3708,7 @@ uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vhaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VHADDQ_V2_I]] uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) { @@ -3718,7 +3718,7 @@ uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vhaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VHADDQ_V2_I]] uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) { @@ -3726,7 +3726,7 @@ uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: @test_vhsub_s8( -// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VHSUB_V_I]] int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) { return vhsub_s8(a, b); @@ -3735,7 +3735,7 @@ int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vhsub_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VHSUB_V2_I]] int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) { @@ -3745,7 +3745,7 @@ int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vhsub_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VHSUB_V2_I]] int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) { @@ -3753,7 +3753,7 @@ int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vhsub_u8( -// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VHSUB_V_I]] uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) { return vhsub_u8(a, b); @@ -3762,7 +3762,7 @@ uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vhsub_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VHSUB_V2_I]] uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) { @@ -3772,7 +3772,7 @@ uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vhsub_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VHSUB_V2_I]] uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) { @@ -3780,7 +3780,7 @@ uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vhsubq_s8( -// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) { return vhsubq_s8(a, b); @@ -3789,7 +3789,7 @@ int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vhsubq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) { @@ -3799,7 +3799,7 @@ int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vhsubq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) { @@ -3807,7 +3807,7 @@ int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vhsubq_u8( -// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) { return vhsubq_u8(a, b); @@ -3816,7 +3816,7 @@ uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vhsubq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) { @@ -3826,7 +3826,7 @@ uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vhsubq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) { @@ -7002,7 +7002,7 @@ poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) { } // CHECK-LABEL: @test_vmax_s8( -// CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VMAX_V_I]] int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { return vmax_s8(a, b); @@ -7011,7 +7011,7 @@ int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vmax_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VMAX_V2_I]] int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { @@ -7021,7 +7021,7 @@ int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vmax_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VMAX_V2_I]] int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { @@ -7029,7 +7029,7 @@ int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vmax_u8( -// CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VMAX_V_I]] uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { return vmax_u8(a, b); @@ -7038,7 +7038,7 @@ uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vmax_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VMAX_V2_I]] uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { @@ -7048,7 +7048,7 @@ uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vmax_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VMAX_V2_I]] uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { @@ -7058,7 +7058,7 @@ uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vmax_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VMAX_V2_I]] float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { @@ -7066,7 +7066,7 @@ float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { } // CHECK-LABEL: @test_vmaxq_s8( -// CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VMAXQ_V_I]] int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { return vmaxq_s8(a, b); @@ -7075,7 +7075,7 @@ int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vmaxq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VMAXQ_V2_I]] int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { @@ -7085,7 +7085,7 @@ int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vmaxq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VMAXQ_V2_I]] int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { @@ -7093,7 +7093,7 @@ int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vmaxq_u8( -// CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VMAXQ_V_I]] uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { return vmaxq_u8(a, b); @@ -7102,7 +7102,7 @@ uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vmaxq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VMAXQ_V2_I]] uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { @@ -7112,7 +7112,7 @@ uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vmaxq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VMAXQ_V2_I]] uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { @@ -7122,7 +7122,7 @@ uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vmaxq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8> // CHECK: ret <4 x float> [[VMAXQ_V2_I]] float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { @@ -7130,7 +7130,7 @@ float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { } // CHECK-LABEL: @test_vmin_s8( -// CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VMIN_V_I]] int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { return vmin_s8(a, b); @@ -7139,7 +7139,7 @@ int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vmin_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VMIN_V2_I]] int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { @@ -7149,7 +7149,7 @@ int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vmin_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VMIN_V2_I]] int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { @@ -7157,7 +7157,7 @@ int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vmin_u8( -// CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VMIN_V_I]] uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { return vmin_u8(a, b); @@ -7166,7 +7166,7 @@ uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vmin_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VMIN_V2_I]] uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { @@ -7176,7 +7176,7 @@ uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vmin_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VMIN_V2_I]] uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { @@ -7186,7 +7186,7 @@ uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vmin_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VMIN_V2_I]] float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { @@ -7194,7 +7194,7 @@ float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { } // CHECK-LABEL: @test_vminq_s8( -// CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VMINQ_V_I]] int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { return vminq_s8(a, b); @@ -7203,7 +7203,7 @@ int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vminq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VMINQ_V2_I]] int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { @@ -7213,7 +7213,7 @@ int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vminq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VMINQ_V2_I]] int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { @@ -7221,7 +7221,7 @@ int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vminq_u8( -// CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VMINQ_V_I]] uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { return vminq_u8(a, b); @@ -7230,7 +7230,7 @@ uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vminq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VMINQ_V2_I]] uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { @@ -7240,7 +7240,7 @@ uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vminq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VMINQ_V2_I]] uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { @@ -7250,7 +7250,7 @@ uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vminq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8> // CHECK: ret <4 x float> [[VMINQ_V2_I]] float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { @@ -7370,7 +7370,7 @@ uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { } // CHECK-LABEL: @test_vmlal_s8( -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { @@ -7380,7 +7380,7 @@ int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { // CHECK-LABEL: @test_vmlal_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { @@ -7390,7 +7390,7 @@ int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: @test_vmlal_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { @@ -7398,7 +7398,7 @@ int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { } // CHECK-LABEL: @test_vmlal_u8( -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { @@ -7408,7 +7408,7 @@ uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { // CHECK-LABEL: @test_vmlal_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { @@ -7418,7 +7418,7 @@ uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { // CHECK-LABEL: @test_vmlal_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { @@ -7429,7 +7429,7 @@ uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { @@ -7440,7 +7440,7 @@ int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { @@ -7451,7 +7451,7 @@ int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { @@ -7462,7 +7462,7 @@ uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { @@ -7476,7 +7476,7 @@ uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { @@ -7488,7 +7488,7 @@ int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { @@ -7502,7 +7502,7 @@ int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { @@ -7514,7 +7514,7 @@ uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { @@ -7846,7 +7846,7 @@ uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { } // CHECK-LABEL: @test_vmlsl_s8( -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { @@ -7856,7 +7856,7 @@ int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { // CHECK-LABEL: @test_vmlsl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { @@ -7866,7 +7866,7 @@ int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: @test_vmlsl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { @@ -7874,7 +7874,7 @@ int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { } // CHECK-LABEL: @test_vmlsl_u8( -// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { @@ -7884,7 +7884,7 @@ uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { // CHECK-LABEL: @test_vmlsl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { @@ -7894,7 +7894,7 @@ uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { // CHECK-LABEL: @test_vmlsl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { @@ -7905,7 +7905,7 @@ uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { @@ -7916,7 +7916,7 @@ int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { @@ -7927,7 +7927,7 @@ int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { @@ -7938,7 +7938,7 @@ uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { @@ -7952,7 +7952,7 @@ uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { @@ -7964,7 +7964,7 @@ int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { @@ -7978,7 +7978,7 @@ int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { @@ -7990,7 +7990,7 @@ uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { @@ -8696,7 +8696,7 @@ uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: @test_vmull_s8( -// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i16> [[VMULL_I]] int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { return vmull_s8(a, b); @@ -8705,7 +8705,7 @@ int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vmull_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { return vmull_s16(a, b); @@ -8714,14 +8714,14 @@ int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vmull_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) { return vmull_s32(a, b); } // CHECK-LABEL: @test_vmull_u8( -// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i16> [[VMULL_I]] uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { return vmull_u8(a, b); @@ -8730,7 +8730,7 @@ uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vmull_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { return vmull_u16(a, b); @@ -8739,14 +8739,14 @@ uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vmull_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { return vmull_u32(a, b); } // CHECK-LABEL: @test_vmull_p8( -// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i16> [[VMULL_I]] poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { return vmull_p8(a, b); @@ -8756,7 +8756,7 @@ poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) { return vmull_lane_s16(a, b, 3); @@ -8766,7 +8766,7 @@ int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) { return vmull_lane_s32(a, b, 1); @@ -8776,7 +8776,7 @@ int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) { return vmull_lane_u16(a, b, 3); @@ -8786,7 +8786,7 @@ uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) { return vmull_lane_u32(a, b, 1); @@ -8799,7 +8799,7 @@ uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) // CHECK: ret <4 x i32> [[VMULL5_I]] int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { return vmull_n_s16(a, b); @@ -8810,7 +8810,7 @@ int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) // CHECK: ret <2 x i64> [[VMULL3_I]] int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { return vmull_n_s32(a, b); @@ -8823,7 +8823,7 @@ int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) // CHECK: ret <4 x i32> [[VMULL5_I]] uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { return vmull_n_u16(a, b); @@ -8834,21 +8834,21 @@ uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) // CHECK: ret <2 x i64> [[VMULL3_I]] uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { return vmull_n_u32(a, b); } // CHECK-LABEL: @test_vmul_p8( -// CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VMUL_V_I]] poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) { return vmul_p8(a, b); } // CHECK-LABEL: @test_vmulq_p8( -// CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VMULQ_V_I]] poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) { return vmulq_p8(a, b); @@ -9442,7 +9442,7 @@ uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vpadal_s8( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4 +// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) // CHECK: ret <4 x i16> [[VPADAL_V1_I]] int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) { return vpadal_s8(a, b); @@ -9451,7 +9451,7 @@ int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) { // CHECK-LABEL: @test_vpadal_s16( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4 +// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) // CHECK: ret <2 x i32> [[VPADAL_V2_I]] int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) { return vpadal_s16(a, b); @@ -9460,7 +9460,7 @@ int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) { // CHECK-LABEL: @test_vpadal_s32( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4 +// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) // CHECK: ret <1 x i64> [[VPADAL_V2_I]] int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) { return vpadal_s32(a, b); @@ -9468,7 +9468,7 @@ int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) { // CHECK-LABEL: @test_vpadal_u8( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4 +// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) // CHECK: ret <4 x i16> [[VPADAL_V1_I]] uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) { return vpadal_u8(a, b); @@ -9477,7 +9477,7 @@ uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) { // CHECK-LABEL: @test_vpadal_u16( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4 +// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) // CHECK: ret <2 x i32> [[VPADAL_V2_I]] uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) { return vpadal_u16(a, b); @@ -9486,7 +9486,7 @@ uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) { // CHECK-LABEL: @test_vpadal_u32( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4 +// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) // CHECK: ret <1 x i64> [[VPADAL_V2_I]] uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) { return vpadal_u32(a, b); @@ -9494,7 +9494,7 @@ uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) { // CHECK-LABEL: @test_vpadalq_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4 +// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) // CHECK: ret <8 x i16> [[VPADALQ_V1_I]] int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) { return vpadalq_s8(a, b); @@ -9503,7 +9503,7 @@ int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) { // CHECK-LABEL: @test_vpadalq_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4 +// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) // CHECK: ret <4 x i32> [[VPADALQ_V2_I]] int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) { return vpadalq_s16(a, b); @@ -9512,7 +9512,7 @@ int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) { // CHECK-LABEL: @test_vpadalq_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4 +// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) // CHECK: ret <2 x i64> [[VPADALQ_V2_I]] int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) { return vpadalq_s32(a, b); @@ -9520,7 +9520,7 @@ int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) { // CHECK-LABEL: @test_vpadalq_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4 +// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) // CHECK: ret <8 x i16> [[VPADALQ_V1_I]] uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) { return vpadalq_u8(a, b); @@ -9529,7 +9529,7 @@ uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) { // CHECK-LABEL: @test_vpadalq_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4 +// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) // CHECK: ret <4 x i32> [[VPADALQ_V2_I]] uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) { return vpadalq_u16(a, b); @@ -9538,14 +9538,14 @@ uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) { // CHECK-LABEL: @test_vpadalq_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4 +// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) // CHECK: ret <2 x i64> [[VPADALQ_V2_I]] uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) { return vpadalq_u32(a, b); } // CHECK-LABEL: @test_vpadd_s8( -// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPADD_V_I]] int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { return vpadd_s8(a, b); @@ -9554,7 +9554,7 @@ int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vpadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VPADD_V2_I]] int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { @@ -9564,7 +9564,7 @@ int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vpadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VPADD_V2_I]] int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { @@ -9572,7 +9572,7 @@ int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vpadd_u8( -// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPADD_V_I]] uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { return vpadd_u8(a, b); @@ -9581,7 +9581,7 @@ uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vpadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VPADD_V2_I]] uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { @@ -9591,7 +9591,7 @@ uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vpadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VPADD_V2_I]] uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { @@ -9601,7 +9601,7 @@ uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vpadd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VPADD_V2_I]] float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { @@ -9609,7 +9609,7 @@ float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { } // CHECK-LABEL: @test_vpaddl_s8( -// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4 +// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) // CHECK: ret <4 x i16> [[VPADDL_I]] int16x4_t test_vpaddl_s8(int8x8_t a) { return vpaddl_s8(a); @@ -9617,7 +9617,7 @@ int16x4_t test_vpaddl_s8(int8x8_t a) { // CHECK-LABEL: @test_vpaddl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) // CHECK: ret <2 x i32> [[VPADDL1_I]] int32x2_t test_vpaddl_s16(int16x4_t a) { return vpaddl_s16(a); @@ -9625,14 +9625,14 @@ int32x2_t test_vpaddl_s16(int16x4_t a) { // CHECK-LABEL: @test_vpaddl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) // CHECK: ret <1 x i64> [[VPADDL1_I]] int64x1_t test_vpaddl_s32(int32x2_t a) { return vpaddl_s32(a); } // CHECK-LABEL: @test_vpaddl_u8( -// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4 +// CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) // CHECK: ret <4 x i16> [[VPADDL_I]] uint16x4_t test_vpaddl_u8(uint8x8_t a) { return vpaddl_u8(a); @@ -9640,7 +9640,7 @@ uint16x4_t test_vpaddl_u8(uint8x8_t a) { // CHECK-LABEL: @test_vpaddl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) // CHECK: ret <2 x i32> [[VPADDL1_I]] uint32x2_t test_vpaddl_u16(uint16x4_t a) { return vpaddl_u16(a); @@ -9648,14 +9648,14 @@ uint32x2_t test_vpaddl_u16(uint16x4_t a) { // CHECK-LABEL: @test_vpaddl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) // CHECK: ret <1 x i64> [[VPADDL1_I]] uint64x1_t test_vpaddl_u32(uint32x2_t a) { return vpaddl_u32(a); } // CHECK-LABEL: @test_vpaddlq_s8( -// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4 +// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) // CHECK: ret <8 x i16> [[VPADDL_I]] int16x8_t test_vpaddlq_s8(int8x16_t a) { return vpaddlq_s8(a); @@ -9663,7 +9663,7 @@ int16x8_t test_vpaddlq_s8(int8x16_t a) { // CHECK-LABEL: @test_vpaddlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) // CHECK: ret <4 x i32> [[VPADDL1_I]] int32x4_t test_vpaddlq_s16(int16x8_t a) { return vpaddlq_s16(a); @@ -9671,14 +9671,14 @@ int32x4_t test_vpaddlq_s16(int16x8_t a) { // CHECK-LABEL: @test_vpaddlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) // CHECK: ret <2 x i64> [[VPADDL1_I]] int64x2_t test_vpaddlq_s32(int32x4_t a) { return vpaddlq_s32(a); } // CHECK-LABEL: @test_vpaddlq_u8( -// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4 +// CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) // CHECK: ret <8 x i16> [[VPADDL_I]] uint16x8_t test_vpaddlq_u8(uint8x16_t a) { return vpaddlq_u8(a); @@ -9686,7 +9686,7 @@ uint16x8_t test_vpaddlq_u8(uint8x16_t a) { // CHECK-LABEL: @test_vpaddlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) // CHECK: ret <4 x i32> [[VPADDL1_I]] uint32x4_t test_vpaddlq_u16(uint16x8_t a) { return vpaddlq_u16(a); @@ -9694,14 +9694,14 @@ uint32x4_t test_vpaddlq_u16(uint16x8_t a) { // CHECK-LABEL: @test_vpaddlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) // CHECK: ret <2 x i64> [[VPADDL1_I]] uint64x2_t test_vpaddlq_u32(uint32x4_t a) { return vpaddlq_u32(a); } // CHECK-LABEL: @test_vpmax_s8( -// CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPMAX_V_I]] int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { return vpmax_s8(a, b); @@ -9710,7 +9710,7 @@ int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vpmax_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VPMAX_V2_I]] int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { @@ -9720,7 +9720,7 @@ int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vpmax_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VPMAX_V2_I]] int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { @@ -9728,7 +9728,7 @@ int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vpmax_u8( -// CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPMAX_V_I]] uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { return vpmax_u8(a, b); @@ -9737,7 +9737,7 @@ uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vpmax_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VPMAX_V2_I]] uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { @@ -9747,7 +9747,7 @@ uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vpmax_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VPMAX_V2_I]] uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { @@ -9757,7 +9757,7 @@ uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vpmax_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VPMAX_V2_I]] float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { @@ -9765,7 +9765,7 @@ float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { } // CHECK-LABEL: @test_vpmin_s8( -// CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPMIN_V_I]] int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { return vpmin_s8(a, b); @@ -9774,7 +9774,7 @@ int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vpmin_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VPMIN_V2_I]] int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { @@ -9784,7 +9784,7 @@ int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vpmin_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VPMIN_V2_I]] int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { @@ -9792,7 +9792,7 @@ int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vpmin_u8( -// CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VPMIN_V_I]] uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { return vpmin_u8(a, b); @@ -9801,7 +9801,7 @@ uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vpmin_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VPMIN_V2_I]] uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { @@ -9811,7 +9811,7 @@ uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vpmin_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VPMIN_V2_I]] uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { @@ -9821,7 +9821,7 @@ uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vpmin_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VPMIN_V2_I]] float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { @@ -9829,7 +9829,7 @@ float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { } // CHECK-LABEL: @test_vqabs_s8( -// CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4 +// CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VQABS_V_I]] int8x8_t test_vqabs_s8(int8x8_t a) { return vqabs_s8(a); @@ -9837,7 +9837,7 @@ int8x8_t test_vqabs_s8(int8x8_t a) { // CHECK-LABEL: @test_vqabs_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) #4 +// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) // CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQABS_V1_I]] int16x4_t test_vqabs_s16(int16x4_t a) { @@ -9846,7 +9846,7 @@ int16x4_t test_vqabs_s16(int16x4_t a) { // CHECK-LABEL: @test_vqabs_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) #4 +// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) // CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQABS_V1_I]] int32x2_t test_vqabs_s32(int32x2_t a) { @@ -9854,7 +9854,7 @@ int32x2_t test_vqabs_s32(int32x2_t a) { } // CHECK-LABEL: @test_vqabsq_s8( -// CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4 +// CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VQABSQ_V_I]] int8x16_t test_vqabsq_s8(int8x16_t a) { return vqabsq_s8(a); @@ -9862,7 +9862,7 @@ int8x16_t test_vqabsq_s8(int8x16_t a) { // CHECK-LABEL: @test_vqabsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) #4 +// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQABSQ_V1_I]] int16x8_t test_vqabsq_s16(int16x8_t a) { @@ -9871,7 +9871,7 @@ int16x8_t test_vqabsq_s16(int16x8_t a) { // CHECK-LABEL: @test_vqabsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) #4 +// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQABSQ_V1_I]] int32x4_t test_vqabsq_s32(int32x4_t a) { @@ -9879,7 +9879,7 @@ int32x4_t test_vqabsq_s32(int32x4_t a) { } // CHECK-LABEL: @test_vqadd_s8( -// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQADD_V_I]] int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { return vqadd_s8(a, b); @@ -9888,7 +9888,7 @@ int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQADD_V2_I]] int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { @@ -9898,7 +9898,7 @@ int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQADD_V2_I]] int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { @@ -9908,7 +9908,7 @@ int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqadd_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQADD_V2_I]] int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { @@ -9916,7 +9916,7 @@ int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqadd_u8( -// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQADD_V_I]] uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { return vqadd_u8(a, b); @@ -9925,7 +9925,7 @@ uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vqadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQADD_V2_I]] uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { @@ -9935,7 +9935,7 @@ uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vqadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQADD_V2_I]] uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { @@ -9945,7 +9945,7 @@ uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vqadd_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQADD_V2_I]] uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { @@ -9953,7 +9953,7 @@ uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vqaddq_s8( -// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQADDQ_V_I]] int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { return vqaddq_s8(a, b); @@ -9962,7 +9962,7 @@ int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { @@ -9972,7 +9972,7 @@ int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { @@ -9982,7 +9982,7 @@ int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqaddq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { @@ -9990,7 +9990,7 @@ int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqaddq_u8( -// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQADDQ_V_I]] uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { return vqaddq_u8(a, b); @@ -9999,7 +9999,7 @@ uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vqaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { @@ -10009,7 +10009,7 @@ uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vqaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { @@ -10019,7 +10019,7 @@ uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vqaddq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { @@ -10030,8 +10030,8 @@ uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlal_s16(a, b, c); @@ -10041,8 +10041,8 @@ int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlal_s32(a, b, c); @@ -10053,8 +10053,8 @@ int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlal_lane_s16(a, b, c, 3); @@ -10065,8 +10065,8 @@ int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlal_lane_s32(a, b, c, 1); @@ -10080,8 +10080,8 @@ int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 -// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) #4 +// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) +// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]] int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlal_n_s16(a, b, c); @@ -10093,8 +10093,8 @@ int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 -// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) #4 +// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) +// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]] int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlal_n_s32(a, b, c); @@ -10104,8 +10104,8 @@ int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlsl_s16(a, b, c); @@ -10115,8 +10115,8 @@ int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlsl_s32(a, b, c); @@ -10127,8 +10127,8 @@ int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlsl_lane_s16(a, b, c, 3); @@ -10139,8 +10139,8 @@ int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlsl_lane_s32(a, b, c, 1); @@ -10154,8 +10154,8 @@ int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 -// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) #4 +// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) +// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]] int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlsl_n_s16(a, b, c); @@ -10167,8 +10167,8 @@ int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 -// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) #4 +// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) +// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]] int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlsl_n_s32(a, b, c); @@ -10177,7 +10177,7 @@ int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { // CHECK-LABEL: @test_vqdmulh_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { @@ -10187,7 +10187,7 @@ int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqdmulh_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { @@ -10197,7 +10197,7 @@ int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqdmulhq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { @@ -10207,7 +10207,7 @@ int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqdmulhq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { @@ -10218,7 +10218,7 @@ int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) { @@ -10229,7 +10229,7 @@ int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) { @@ -10240,7 +10240,7 @@ int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) { @@ -10251,7 +10251,7 @@ int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) { @@ -10265,7 +10265,7 @@ int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) // CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQDMULH_V5_I]] int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { @@ -10277,7 +10277,7 @@ int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQDMULH_V3_I]] int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { @@ -10295,7 +10295,7 @@ int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> -// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) #4 +// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) // CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQDMULHQ_V9_I]] int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { @@ -10309,7 +10309,7 @@ int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) #4 +// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) // CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULHQ_V5_I]] int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { @@ -10319,7 +10319,7 @@ int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { // CHECK-LABEL: @test_vqdmull_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { @@ -10329,7 +10329,7 @@ int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqdmull_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { @@ -10340,7 +10340,7 @@ int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) { @@ -10351,7 +10351,7 @@ int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) { @@ -10365,7 +10365,7 @@ int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) // CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQDMULL_V5_I]] int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { @@ -10377,7 +10377,7 @@ int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQDMULL_V3_I]] int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { @@ -10386,7 +10386,7 @@ int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { // CHECK-LABEL: @test_vqmovn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) // CHECK: ret <8 x i8> [[VQMOVN_V1_I]] int8x8_t test_vqmovn_s16(int16x8_t a) { return vqmovn_s16(a); @@ -10394,7 +10394,7 @@ int8x8_t test_vqmovn_s16(int16x8_t a) { // CHECK-LABEL: @test_vqmovn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQMOVN_V1_I]] int16x4_t test_vqmovn_s32(int32x4_t a) { @@ -10403,7 +10403,7 @@ int16x4_t test_vqmovn_s32(int32x4_t a) { // CHECK-LABEL: @test_vqmovn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQMOVN_V1_I]] int32x2_t test_vqmovn_s64(int64x2_t a) { @@ -10412,7 +10412,7 @@ int32x2_t test_vqmovn_s64(int64x2_t a) { // CHECK-LABEL: @test_vqmovn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) // CHECK: ret <8 x i8> [[VQMOVN_V1_I]] uint8x8_t test_vqmovn_u16(uint16x8_t a) { return vqmovn_u16(a); @@ -10420,7 +10420,7 @@ uint8x8_t test_vqmovn_u16(uint16x8_t a) { // CHECK-LABEL: @test_vqmovn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQMOVN_V1_I]] uint16x4_t test_vqmovn_u32(uint32x4_t a) { @@ -10429,7 +10429,7 @@ uint16x4_t test_vqmovn_u32(uint32x4_t a) { // CHECK-LABEL: @test_vqmovn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQMOVN_V1_I]] uint32x2_t test_vqmovn_u64(uint64x2_t a) { @@ -10438,7 +10438,7 @@ uint32x2_t test_vqmovn_u64(uint64x2_t a) { // CHECK-LABEL: @test_vqmovun_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) #4 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) // CHECK: ret <8 x i8> [[VQMOVUN_V1_I]] uint8x8_t test_vqmovun_s16(int16x8_t a) { return vqmovun_s16(a); @@ -10446,7 +10446,7 @@ uint8x8_t test_vqmovun_s16(int16x8_t a) { // CHECK-LABEL: @test_vqmovun_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) #4 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQMOVUN_V1_I]] uint16x4_t test_vqmovun_s32(int32x4_t a) { @@ -10455,7 +10455,7 @@ uint16x4_t test_vqmovun_s32(int32x4_t a) { // CHECK-LABEL: @test_vqmovun_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) #4 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQMOVUN_V1_I]] uint32x2_t test_vqmovun_s64(int64x2_t a) { @@ -10463,7 +10463,7 @@ uint32x2_t test_vqmovun_s64(int64x2_t a) { } // CHECK-LABEL: @test_vqneg_s8( -// CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4 +// CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) // CHECK: ret <8 x i8> [[VQNEG_V_I]] int8x8_t test_vqneg_s8(int8x8_t a) { return vqneg_s8(a); @@ -10471,7 +10471,7 @@ int8x8_t test_vqneg_s8(int8x8_t a) { // CHECK-LABEL: @test_vqneg_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) #4 +// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQNEG_V1_I]] int16x4_t test_vqneg_s16(int16x4_t a) { @@ -10480,7 +10480,7 @@ int16x4_t test_vqneg_s16(int16x4_t a) { // CHECK-LABEL: @test_vqneg_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) #4 +// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQNEG_V1_I]] int32x2_t test_vqneg_s32(int32x2_t a) { @@ -10488,7 +10488,7 @@ int32x2_t test_vqneg_s32(int32x2_t a) { } // CHECK-LABEL: @test_vqnegq_s8( -// CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4 +// CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) // CHECK: ret <16 x i8> [[VQNEGQ_V_I]] int8x16_t test_vqnegq_s8(int8x16_t a) { return vqnegq_s8(a); @@ -10496,7 +10496,7 @@ int8x16_t test_vqnegq_s8(int8x16_t a) { // CHECK-LABEL: @test_vqnegq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) #4 +// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQNEGQ_V1_I]] int16x8_t test_vqnegq_s16(int16x8_t a) { @@ -10505,7 +10505,7 @@ int16x8_t test_vqnegq_s16(int16x8_t a) { // CHECK-LABEL: @test_vqnegq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) #4 +// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQNEGQ_V1_I]] int32x4_t test_vqnegq_s32(int32x4_t a) { @@ -10515,7 +10515,7 @@ int32x4_t test_vqnegq_s32(int32x4_t a) { // CHECK-LABEL: @test_vqrdmulh_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { @@ -10525,7 +10525,7 @@ int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqrdmulh_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { @@ -10535,7 +10535,7 @@ int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqrdmulhq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { @@ -10545,7 +10545,7 @@ int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqrdmulhq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { @@ -10556,7 +10556,7 @@ int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) { @@ -10567,7 +10567,7 @@ int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) { @@ -10578,7 +10578,7 @@ int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) { @@ -10589,7 +10589,7 @@ int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) { // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) { @@ -10603,7 +10603,7 @@ int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) // CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRDMULH_V5_I]] int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { @@ -10615,7 +10615,7 @@ int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRDMULH_V3_I]] int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { @@ -10633,7 +10633,7 @@ int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) #4 +// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) // CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRDMULHQ_V9_I]] int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { @@ -10647,7 +10647,7 @@ int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) #4 +// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) // CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRDMULHQ_V5_I]] int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { @@ -10655,7 +10655,7 @@ int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { } // CHECK-LABEL: @test_vqrshl_s8( -// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQRSHL_V_I]] int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { return vqrshl_s8(a, b); @@ -10664,7 +10664,7 @@ int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqrshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRSHL_V2_I]] int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { @@ -10674,7 +10674,7 @@ int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqrshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRSHL_V2_I]] int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { @@ -10684,7 +10684,7 @@ int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqrshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQRSHL_V2_I]] int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { @@ -10692,7 +10692,7 @@ int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqrshl_u8( -// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQRSHL_V_I]] uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { return vqrshl_u8(a, b); @@ -10701,7 +10701,7 @@ uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqrshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQRSHL_V2_I]] uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { @@ -10711,7 +10711,7 @@ uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqrshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQRSHL_V2_I]] uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { @@ -10721,7 +10721,7 @@ uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqrshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQRSHL_V2_I]] uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { @@ -10729,7 +10729,7 @@ uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqrshlq_s8( -// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { return vqrshlq_s8(a, b); @@ -10738,7 +10738,7 @@ int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqrshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]] int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { @@ -10748,7 +10748,7 @@ int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqrshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]] int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { @@ -10758,7 +10758,7 @@ int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqrshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]] int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { @@ -10766,7 +10766,7 @@ int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqrshlq_u8( -// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { return vqrshlq_u8(a, b); @@ -10775,7 +10775,7 @@ uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqrshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]] uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { @@ -10785,7 +10785,7 @@ uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqrshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]] uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { @@ -10795,7 +10795,7 @@ uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqrshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]] uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { @@ -10884,7 +10884,7 @@ uint32x2_t test_vqrshrun_n_s64(int64x2_t a) { } // CHECK-LABEL: @test_vqshl_s8( -// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSHL_V_I]] int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { return vqshl_s8(a, b); @@ -10893,7 +10893,7 @@ int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSHL_V2_I]] int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { @@ -10903,7 +10903,7 @@ int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSHL_V2_I]] int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { @@ -10913,7 +10913,7 @@ int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSHL_V2_I]] int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { @@ -10921,7 +10921,7 @@ int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqshl_u8( -// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSHL_V_I]] uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { return vqshl_u8(a, b); @@ -10930,7 +10930,7 @@ uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSHL_V2_I]] uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { @@ -10940,7 +10940,7 @@ uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSHL_V2_I]] uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { @@ -10950,7 +10950,7 @@ uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSHL_V2_I]] uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { @@ -10958,7 +10958,7 @@ uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqshlq_s8( -// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { return vqshlq_s8(a, b); @@ -10967,7 +10967,7 @@ int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { @@ -10977,7 +10977,7 @@ int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { @@ -10987,7 +10987,7 @@ int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { @@ -10995,7 +10995,7 @@ int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqshlq_u8( -// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { return vqshlq_u8(a, b); @@ -11004,7 +11004,7 @@ uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { @@ -11014,7 +11014,7 @@ uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { @@ -11024,7 +11024,7 @@ uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { @@ -11317,7 +11317,7 @@ uint32x2_t test_vqshrun_n_s64(int64x2_t a) { } // CHECK-LABEL: @test_vqsub_s8( -// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSUB_V_I]] int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { return vqsub_s8(a, b); @@ -11326,7 +11326,7 @@ int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqsub_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSUB_V2_I]] int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { @@ -11336,7 +11336,7 @@ int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqsub_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSUB_V2_I]] int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { @@ -11346,7 +11346,7 @@ int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqsub_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSUB_V2_I]] int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { @@ -11354,7 +11354,7 @@ int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqsub_u8( -// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSUB_V_I]] uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { return vqsub_u8(a, b); @@ -11363,7 +11363,7 @@ uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vqsub_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSUB_V2_I]] uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { @@ -11373,7 +11373,7 @@ uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vqsub_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSUB_V2_I]] uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { @@ -11383,7 +11383,7 @@ uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vqsub_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSUB_V2_I]] uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { @@ -11391,7 +11391,7 @@ uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vqsubq_s8( -// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { return vqsubq_s8(a, b); @@ -11400,7 +11400,7 @@ int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqsubq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { @@ -11410,7 +11410,7 @@ int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqsubq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { @@ -11420,7 +11420,7 @@ int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqsubq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { @@ -11428,7 +11428,7 @@ int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqsubq_u8( -// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { return vqsubq_u8(a, b); @@ -11437,7 +11437,7 @@ uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vqsubq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { @@ -11447,7 +11447,7 @@ uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vqsubq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { @@ -11457,7 +11457,7 @@ uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vqsubq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { @@ -11467,7 +11467,7 @@ uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vraddhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { return vraddhn_s16(a, b); @@ -11476,7 +11476,7 @@ int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vraddhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRADDHN_V2_I]] int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { @@ -11486,7 +11486,7 @@ int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vraddhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRADDHN_V2_I]] int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { @@ -11496,7 +11496,7 @@ int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vraddhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { return vraddhn_u16(a, b); @@ -11505,7 +11505,7 @@ uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vraddhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRADDHN_V2_I]] uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { @@ -11515,7 +11515,7 @@ uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vraddhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRADDHN_V2_I]] uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { @@ -11524,7 +11524,7 @@ uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vrecpe_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) #4 +// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRECPE_V1_I]] float32x2_t test_vrecpe_f32(float32x2_t a) { return vrecpe_f32(a); @@ -11532,7 +11532,7 @@ float32x2_t test_vrecpe_f32(float32x2_t a) { // CHECK-LABEL: @test_vrecpe_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) #4 +// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) // CHECK: ret <2 x i32> [[VRECPE_V1_I]] uint32x2_t test_vrecpe_u32(uint32x2_t a) { return vrecpe_u32(a); @@ -11540,7 +11540,7 @@ uint32x2_t test_vrecpe_u32(uint32x2_t a) { // CHECK-LABEL: @test_vrecpeq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) #4 +// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRECPEQ_V1_I]] float32x4_t test_vrecpeq_f32(float32x4_t a) { return vrecpeq_f32(a); @@ -11548,7 +11548,7 @@ float32x4_t test_vrecpeq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrecpeq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) #4 +// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) // CHECK: ret <4 x i32> [[VRECPEQ_V1_I]] uint32x4_t test_vrecpeq_u32(uint32x4_t a) { return vrecpeq_u32(a); @@ -11557,7 +11557,7 @@ uint32x4_t test_vrecpeq_u32(uint32x4_t a) { // CHECK-LABEL: @test_vrecps_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VRECPS_V2_I]] float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) { @@ -11567,7 +11567,7 @@ float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vrecpsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8> // CHECK: ret <4 x float> [[VRECPSQ_V2_I]] float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) { @@ -13643,7 +13643,7 @@ float32x4_t test_vrev64q_f32(float32x4_t a) { } // CHECK-LABEL: @test_vrhadd_s8( -// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VRHADD_V_I]] int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) { return vrhadd_s8(a, b); @@ -13652,7 +13652,7 @@ int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vrhadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRHADD_V2_I]] int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) { @@ -13662,7 +13662,7 @@ int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vrhadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRHADD_V2_I]] int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) { @@ -13670,7 +13670,7 @@ int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vrhadd_u8( -// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VRHADD_V_I]] uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) { return vrhadd_u8(a, b); @@ -13679,7 +13679,7 @@ uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vrhadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRHADD_V2_I]] uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) { @@ -13689,7 +13689,7 @@ uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vrhadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRHADD_V2_I]] uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) { @@ -13697,7 +13697,7 @@ uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vrhaddq_s8( -// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) { return vrhaddq_s8(a, b); @@ -13706,7 +13706,7 @@ int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vrhaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) { @@ -13716,7 +13716,7 @@ int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vrhaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) { @@ -13724,7 +13724,7 @@ int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vrhaddq_u8( -// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) { return vrhaddq_u8(a, b); @@ -13733,7 +13733,7 @@ uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vrhaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) { @@ -13743,7 +13743,7 @@ uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vrhaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) { @@ -13751,7 +13751,7 @@ uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: @test_vrshl_s8( -// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VRSHL_V_I]] int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { return vrshl_s8(a, b); @@ -13760,7 +13760,7 @@ int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vrshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRSHL_V2_I]] int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { @@ -13770,7 +13770,7 @@ int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vrshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRSHL_V2_I]] int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { @@ -13780,7 +13780,7 @@ int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vrshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VRSHL_V2_I]] int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { @@ -13788,7 +13788,7 @@ int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vrshl_u8( -// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VRSHL_V_I]] uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { return vrshl_u8(a, b); @@ -13797,7 +13797,7 @@ uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vrshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRSHL_V2_I]] uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { @@ -13807,7 +13807,7 @@ uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vrshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRSHL_V2_I]] uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { @@ -13817,7 +13817,7 @@ uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vrshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VRSHL_V2_I]] uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { @@ -13825,7 +13825,7 @@ uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vrshlq_s8( -// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { return vrshlq_s8(a, b); @@ -13834,7 +13834,7 @@ int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vrshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]] int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { @@ -13844,7 +13844,7 @@ int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vrshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]] int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { @@ -13854,7 +13854,7 @@ int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vrshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]] int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { @@ -13862,7 +13862,7 @@ int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vrshlq_u8( -// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { return vrshlq_u8(a, b); @@ -13871,7 +13871,7 @@ uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vrshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]] uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { @@ -13881,7 +13881,7 @@ uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vrshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]] uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { @@ -13891,7 +13891,7 @@ uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vrshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]] uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { @@ -14090,7 +14090,7 @@ uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vrsqrte_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) #4 +// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) // CHECK: ret <2 x float> [[VRSQRTE_V1_I]] float32x2_t test_vrsqrte_f32(float32x2_t a) { return vrsqrte_f32(a); @@ -14098,7 +14098,7 @@ float32x2_t test_vrsqrte_f32(float32x2_t a) { // CHECK-LABEL: @test_vrsqrte_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a) #4 +// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a) // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]] uint32x2_t test_vrsqrte_u32(uint32x2_t a) { return vrsqrte_u32(a); @@ -14106,7 +14106,7 @@ uint32x2_t test_vrsqrte_u32(uint32x2_t a) { // CHECK-LABEL: @test_vrsqrteq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) #4 +// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) // CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]] float32x4_t test_vrsqrteq_f32(float32x4_t a) { return vrsqrteq_f32(a); @@ -14114,7 +14114,7 @@ float32x4_t test_vrsqrteq_f32(float32x4_t a) { // CHECK-LABEL: @test_vrsqrteq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a) #4 +// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a) // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]] uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { return vrsqrteq_u32(a); @@ -14123,7 +14123,7 @@ uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { // CHECK-LABEL: @test_vrsqrts_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %a, <2 x float> %b) #4 +// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %a, <2 x float> %b) // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8> // CHECK: ret <2 x float> [[VRSQRTS_V2_I]] float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) { @@ -14133,7 +14133,7 @@ float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) { // CHECK-LABEL: @test_vrsqrtsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %a, <4 x float> %b) #4 +// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %a, <4 x float> %b) // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8> // CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]] float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) { @@ -14319,7 +14319,7 @@ uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vrsubhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { return vrsubhn_s16(a, b); @@ -14328,7 +14328,7 @@ int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vrsubhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]] int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { @@ -14338,7 +14338,7 @@ int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vrsubhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]] int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { @@ -14348,7 +14348,7 @@ int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vrsubhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { return vrsubhn_u16(a, b); @@ -14357,7 +14357,7 @@ uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vrsubhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]] uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { @@ -14367,7 +14367,7 @@ uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vrsubhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]] uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { @@ -14605,7 +14605,7 @@ uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vshl_s8( -// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VSHL_V_I]] int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { return vshl_s8(a, b); @@ -14614,7 +14614,7 @@ int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VSHL_V2_I]] int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { @@ -14624,7 +14624,7 @@ int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VSHL_V2_I]] int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { @@ -14634,7 +14634,7 @@ int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VSHL_V2_I]] int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { @@ -14642,7 +14642,7 @@ int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vshl_u8( -// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VSHL_V_I]] uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { return vshl_u8(a, b); @@ -14651,7 +14651,7 @@ uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VSHL_V2_I]] uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { @@ -14661,7 +14661,7 @@ uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VSHL_V2_I]] uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { @@ -14671,7 +14671,7 @@ uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VSHL_V2_I]] uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { @@ -14679,7 +14679,7 @@ uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vshlq_s8( -// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VSHLQ_V_I]] int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { return vshlq_s8(a, b); @@ -14688,7 +14688,7 @@ int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VSHLQ_V2_I]] int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { @@ -14698,7 +14698,7 @@ int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VSHLQ_V2_I]] int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { @@ -14708,7 +14708,7 @@ int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VSHLQ_V2_I]] int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { @@ -14716,7 +14716,7 @@ int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vshlq_u8( -// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 +// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VSHLQ_V_I]] uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { return vshlq_u8(a, b); @@ -14725,7 +14725,7 @@ uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VSHLQ_V2_I]] uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { @@ -14735,7 +14735,7 @@ uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VSHLQ_V2_I]] uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { @@ -14745,7 +14745,7 @@ uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VSHLQ_V2_I]] uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { @@ -19937,21 +19937,21 @@ uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vtbl1_u8( -// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL1_I]] uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) { return vtbl1_u8(a, b); } // CHECK-LABEL: @test_vtbl1_s8( -// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL1_I]] int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) { return vtbl1_s8(a, b); } // CHECK-LABEL: @test_vtbl1_p8( -// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 +// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL1_I]] poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) { return vtbl1_p8(a, b); @@ -19975,7 +19975,7 @@ poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) { // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 -// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4 +// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL2_I]] uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) { return vtbl2_u8(a, b); @@ -19999,7 +19999,7 @@ uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) { // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 -// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4 +// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL2_I]] int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) { return vtbl2_s8(a, b); @@ -20023,7 +20023,7 @@ int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) { // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 -// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4 +// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL2_I]] poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) { return vtbl2_p8(a, b); @@ -20050,7 +20050,7 @@ poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) { // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 -// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4 +// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL3_I]] uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) { return vtbl3_u8(a, b); @@ -20077,7 +20077,7 @@ uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) { // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 -// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4 +// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL3_I]] int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) { return vtbl3_s8(a, b); @@ -20104,7 +20104,7 @@ int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) { // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 -// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4 +// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL3_I]] poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) { return vtbl3_p8(a, b); @@ -20134,7 +20134,7 @@ poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) { // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 -// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4 +// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL4_I]] uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) { return vtbl4_u8(a, b); @@ -20164,7 +20164,7 @@ uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) { // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 -// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4 +// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL4_I]] int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) { return vtbl4_s8(a, b); @@ -20194,28 +20194,28 @@ int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) { // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 -// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4 +// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) // CHECK: ret <8 x i8> [[VTBL4_I]] poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) { return vtbl4_p8(a, b); } // CHECK-LABEL: @test_vtbx1_u8( -// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX1_I]] uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { return vtbx1_u8(a, b, c); } // CHECK-LABEL: @test_vtbx1_s8( -// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX1_I]] int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) { return vtbx1_s8(a, b, c); } // CHECK-LABEL: @test_vtbx1_p8( -// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 +// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX1_I]] poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) { return vtbx1_p8(a, b, c); @@ -20239,7 +20239,7 @@ poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) { // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 -// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4 +// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX2_I]] uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { return vtbx2_u8(a, b, c); @@ -20263,7 +20263,7 @@ uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 -// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4 +// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX2_I]] int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { return vtbx2_s8(a, b, c); @@ -20287,7 +20287,7 @@ int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 -// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4 +// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX2_I]] poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { return vtbx2_p8(a, b, c); @@ -20314,7 +20314,7 @@ poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 -// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4 +// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX3_I]] uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) { return vtbx3_u8(a, b, c); @@ -20341,7 +20341,7 @@ uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) { // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 -// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4 +// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX3_I]] int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) { return vtbx3_s8(a, b, c); @@ -20368,7 +20368,7 @@ int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) { // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 -// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4 +// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX3_I]] poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) { return vtbx3_p8(a, b, c); @@ -20398,7 +20398,7 @@ poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) { // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 -// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4 +// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX4_I]] uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) { return vtbx4_u8(a, b, c); @@ -20428,7 +20428,7 @@ uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) { // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 -// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4 +// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX4_I]] int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) { return vtbx4_s8(a, b, c); @@ -20458,7 +20458,7 @@ int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) { // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 -// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4 +// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) // CHECK: ret <8 x i8> [[VTBX4_I]] poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) { return vtbx4_p8(a, b, c); @@ -20475,7 +20475,7 @@ poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) { // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !3 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) // CHECK: ret void int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { return vtrn_s8(a, b); @@ -20494,7 +20494,7 @@ int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !6 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { return vtrn_s16(a, b); @@ -20513,7 +20513,7 @@ int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !noalias !9 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { return vtrn_s32(a, b); @@ -20530,7 +20530,7 @@ int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !12 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) // CHECK: ret void uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { return vtrn_u8(a, b); @@ -20549,7 +20549,7 @@ uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !15 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { return vtrn_u16(a, b); @@ -20568,7 +20568,7 @@ uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !noalias !18 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { return vtrn_u32(a, b); @@ -20587,7 +20587,7 @@ uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { // CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]], !noalias !21 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { return vtrn_f32(a, b); @@ -20604,7 +20604,7 @@ float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !24 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) // CHECK: ret void poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { return vtrn_p8(a, b); @@ -20623,7 +20623,7 @@ poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !27 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { return vtrn_p16(a, b); @@ -20640,7 +20640,7 @@ poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !30 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { return vtrnq_s8(a, b); @@ -20659,7 +20659,7 @@ int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !33 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { return vtrnq_s16(a, b); @@ -20678,7 +20678,7 @@ int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !36 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { return vtrnq_s32(a, b); @@ -20695,7 +20695,7 @@ int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !39 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { return vtrnq_u8(a, b); @@ -20714,7 +20714,7 @@ uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !42 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { return vtrnq_u16(a, b); @@ -20733,7 +20733,7 @@ uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !45 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { return vtrnq_u32(a, b); @@ -20752,7 +20752,7 @@ uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], !noalias !48 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { return vtrnq_f32(a, b); @@ -20769,7 +20769,7 @@ float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !51 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { return vtrnq_p8(a, b); @@ -20788,7 +20788,7 @@ poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !54 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { return vtrnq_p16(a, b); @@ -20969,7 +20969,7 @@ uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) { // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !57 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) // CHECK: ret void int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { return vuzp_s8(a, b); @@ -20988,7 +20988,7 @@ int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !60 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { return vuzp_s16(a, b); @@ -21007,7 +21007,7 @@ int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !noalias !63 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { return vuzp_s32(a, b); @@ -21024,7 +21024,7 @@ int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !66 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) // CHECK: ret void uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { return vuzp_u8(a, b); @@ -21043,7 +21043,7 @@ uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !69 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { return vuzp_u16(a, b); @@ -21062,7 +21062,7 @@ uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !noalias !72 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { return vuzp_u32(a, b); @@ -21081,7 +21081,7 @@ uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { // CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]], !noalias !75 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { return vuzp_f32(a, b); @@ -21098,7 +21098,7 @@ float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !78 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) // CHECK: ret void poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { return vuzp_p8(a, b); @@ -21117,7 +21117,7 @@ poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !81 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { return vuzp_p16(a, b); @@ -21134,7 +21134,7 @@ poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !84 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { return vuzpq_s8(a, b); @@ -21153,7 +21153,7 @@ int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !87 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { return vuzpq_s16(a, b); @@ -21172,7 +21172,7 @@ int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !90 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { return vuzpq_s32(a, b); @@ -21189,7 +21189,7 @@ int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !93 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { return vuzpq_u8(a, b); @@ -21208,7 +21208,7 @@ uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !96 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { return vuzpq_u16(a, b); @@ -21227,7 +21227,7 @@ uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !99 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { return vuzpq_u32(a, b); @@ -21246,7 +21246,7 @@ uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], !noalias !102 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { return vuzpq_f32(a, b); @@ -21263,7 +21263,7 @@ float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !105 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { return vuzpq_p8(a, b); @@ -21282,7 +21282,7 @@ poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !108 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { return vuzpq_p16(a, b); @@ -21299,7 +21299,7 @@ poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !111 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) // CHECK: ret void int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { return vzip_s8(a, b); @@ -21318,7 +21318,7 @@ int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !114 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { return vzip_s16(a, b); @@ -21337,7 +21337,7 @@ int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !noalias !117 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { return vzip_s32(a, b); @@ -21354,7 +21354,7 @@ int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !120 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) // CHECK: ret void uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { return vzip_u8(a, b); @@ -21373,7 +21373,7 @@ uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !123 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { return vzip_u16(a, b); @@ -21392,7 +21392,7 @@ uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !noalias !126 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { return vzip_u32(a, b); @@ -21411,7 +21411,7 @@ uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { // CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]], !noalias !129 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { return vzip_f32(a, b); @@ -21428,7 +21428,7 @@ float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !132 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) // CHECK: ret void poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { return vzip_p8(a, b); @@ -21447,7 +21447,7 @@ poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !135 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) // CHECK: ret void poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { return vzip_p16(a, b); @@ -21464,7 +21464,7 @@ poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !138 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { return vzipq_s8(a, b); @@ -21483,7 +21483,7 @@ int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !141 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { return vzipq_s16(a, b); @@ -21502,7 +21502,7 @@ int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !144 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { return vzipq_s32(a, b); @@ -21519,7 +21519,7 @@ int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !147 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { return vzipq_u8(a, b); @@ -21538,7 +21538,7 @@ uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !150 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { return vzipq_u16(a, b); @@ -21557,7 +21557,7 @@ uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !153 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { return vzipq_u32(a, b); @@ -21576,7 +21576,7 @@ uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], !noalias !156 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { return vzipq_f32(a, b); @@ -21593,7 +21593,7 @@ float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !159 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) // CHECK: ret void poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { return vzipq_p8(a, b); @@ -21612,7 +21612,7 @@ poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !162 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* // CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) // CHECK: ret void poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) { return vzipq_p16(a, b); diff --git a/test/CodeGen/libcalls.c b/test/CodeGen/libcalls.c index 8120b75256231..3a8207b2bebc3 100644 --- a/test/CodeGen/libcalls.c +++ b/test/CodeGen/libcalls.c @@ -124,7 +124,7 @@ void test_builtins(double d, float f, long double ld) { // CHECK-YES-NOT: declare float @logf(float) [[NUW_RN]] } -// CHECK-YES: attributes [[NUW_RN]] = { nounwind readnone } +// CHECK-YES: attributes [[NUW_RN]] = { nounwind readnone speculatable } // CHECK-NO: attributes [[NUW_RN]] = { nounwind readnone{{.*}} } -// CHECK-NO: attributes [[NUW_RNI]] = { nounwind readnone } +// CHECK-NO: attributes [[NUW_RNI]] = { nounwind readnone speculatable } diff --git a/test/Driver/arch-specific-libdir-rpath.c b/test/Driver/arch-specific-libdir-rpath.c index 3bcf6e3d3f868..4b210f2cb412b 100644 --- a/test/Driver/arch-specific-libdir-rpath.c +++ b/test/Driver/arch-specific-libdir-rpath.c @@ -81,5 +81,5 @@ // RPATH-X86_64: "-rpath" "[[RESDIR]]{{(/|\\\\)lib(/|\\\\)linux(/|\\\\)x86_64}}" // LIBPATH-AArch64: -L[[RESDIR]]{{(/|\\\\)lib(/|\\\\)linux(/|\\\\)aarch64}} // RPATH-AArch64: "-rpath" "[[RESDIR]]{{(/|\\\\)lib(/|\\\\)linux(/|\\\\)aarch64}}" -// NO-LIBPATH-NOT: -L{{.*Inputs(/|\\\\)resource_dir}} +// NO-LIBPATH-NOT: "-L{{[^"]*Inputs(/|\\\\)resource_dir}}" // NO-RPATH-NOT: "-rpath" {{.*(/|\\\\)Inputs(/|\\\\)resource_dir}} diff --git a/test/Driver/fsanitize-coverage.c b/test/Driver/fsanitize-coverage.c index 6fa1daa4a63e3..22c26be99b696 100644 --- a/test/Driver/fsanitize-coverage.c +++ b/test/Driver/fsanitize-coverage.c @@ -4,12 +4,13 @@ // CHECK-SANITIZE-COVERAGE-0-NOT: fsanitize-coverage-type // CHECK-SANITIZE-COVERAGE-0: -fsanitize=address -// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-coverage=func %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC -// RUN: %clang -target x86_64-linux-gnu -fsanitize=memory -fsanitize-coverage=func %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC -// RUN: %clang -target x86_64-linux-gnu -fsanitize=leak -fsanitize-coverage=func %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC -// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize-coverage=func %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC -// RUN: %clang -target x86_64-linux-gnu -fsanitize=bool -fsanitize-coverage=func %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC -// RUN: %clang -target x86_64-linux-gnu -fsanitize=dataflow -fsanitize-coverage=func %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC +// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC +// RUN: %clang -target x86_64-linux-gnu -fsanitize=memory -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC +// RUN: %clang -target x86_64-linux-gnu -fsanitize=leak -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC +// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC +// RUN: %clang -target x86_64-linux-gnu -fsanitize=bool -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC +// RUN: %clang -target x86_64-linux-gnu -fsanitize=dataflow -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC +// RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=func,trace-pc %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC // CHECK-SANITIZE-COVERAGE-FUNC: fsanitize-coverage-type=1 // RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-coverage=bb %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-BB @@ -25,13 +26,10 @@ // RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-coverage=1 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-1 // CHECK-SANITIZE-COVERAGE-1: warning: argument '-fsanitize-coverage=1' is deprecated, use '-fsanitize-coverage=trace-pc-guard' instead -// RUN: %clang -target x86_64-linux-gnu -fsanitize=thread -fsanitize-coverage=func %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-UNUSED -// RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=func %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FUNC -// CHECK-SANITIZE-COVERAGE-UNUSED: argument unused during compilation: '-fsanitize-coverage=func' -// CHECK-SANITIZE-COVERAGE-UNUSED-NOT: -fsanitize-coverage-type=1 - -// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-coverage=func -fno-sanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-SAN-DISABLED -// CHECK-SANITIZE-COVERAGE-SAN-DISABLED-NOT: argument unused +// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-coverage=func %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_FUNC_BB_EDGE_DEPRECATED +// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-coverage=bb %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_FUNC_BB_EDGE_DEPRECATED +// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-coverage=edge %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_FUNC_BB_EDGE_DEPRECATED +// CHECK_FUNC_BB_EDGE_DEPRECATED: warning: argument '-fsanitize-coverage=[func|bb|edge]' is deprecated, use '-fsanitize-coverage=[func|bb|edge],[trace-pc-guard|trace-pc]' instead // RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-coverage=edge,indirect-calls,trace-pc,trace-cmp,trace-div,trace-gep %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANITIZE-COVERAGE-FEATURES // CHECK-SANITIZE-COVERAGE-FEATURES: -fsanitize-coverage-type=3 @@ -82,7 +80,7 @@ // CHECK-EXTEND-LEGACY: -fsanitize-coverage-type=1 // CHECK-EXTEND-LEGACY: -fsanitize-coverage-trace-cmp -// RUN: %clang_cl --target=i386-pc-win32 -fsanitize=address -fsanitize-coverage=func -c -### -- %s 2>&1 | FileCheck %s -check-prefix=CLANG-CL-COVERAGE +// RUN: %clang_cl --target=i386-pc-win32 -fsanitize=address -fsanitize-coverage=func,trace-pc-guard -c -### -- %s 2>&1 | FileCheck %s -check-prefix=CLANG-CL-COVERAGE // CLANG-CL-COVERAGE-NOT: error: // CLANG-CL-COVERAGE-NOT: warning: // CLANG-CL-COVERAGE-NOT: argument unused diff --git a/test/Index/keep-going.cpp b/test/Index/keep-going.cpp index 82987c6cf133c..dbfcad32d3d28 100644 --- a/test/Index/keep-going.cpp +++ b/test/Index/keep-going.cpp @@ -25,5 +25,5 @@ class C : public A { }; // CHECK: C++ base class specifier=A:4:7 [access=public isVirtual=false] [type=A] [typekind=Unexposed] [templateargs/1= [type=float] [typekind=Float]] [canonicaltype=A] [canonicaltypekind=Record] [canonicaltemplateargs/1= [type=float] [typekind=Float]] [isPOD=0] [nbFields=1] // CHECK: TemplateRef=A:4:7 [type=] [typekind=Invalid] [isPOD=0] -// CHECK-DIAG: keep-going.cpp:1:10: error: 'missing1.h' file not found -// CHECK-DIAG: keep-going.cpp:8:10: error: 'missing2.h' file not found +// CHECK-DIAG: keep-going.cpp:1:10: fatal error: 'missing1.h' file not found +// CHECK-DIAG: keep-going.cpp:8:10: fatal error: 'missing2.h' file not found diff --git a/test/Modules/diag-flags.cpp b/test/Modules/diag-flags.cpp index adbbd08ac8dce..31d2fe4439b8d 100644 --- a/test/Modules/diag-flags.cpp +++ b/test/Modules/diag-flags.cpp @@ -1,20 +1,42 @@ // RUN: rm -rf %t // -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fmodules -fimplicit-module-maps -emit-module -fmodules-cache-path=%t -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DIMPLICIT_FLAG -Werror=padded +// For an implicit module, all that matters are the warning flags in the user. +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodules-cache-path=%t -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -Wpadded +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -Wpadded -Werror +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -Werror=padded // -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fmodules -fimplicit-module-maps -emit-module -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -o %t/explicit.pcm -Werror=string-plus-int -Wpadded -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DEXPLICIT_FLAG -fmodule-file=%t/explicit.pcm -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DEXPLICIT_FLAG -fmodule-file=%t/explicit.pcm -Werror=padded +// For an explicit module, all that matters are the warning flags in the module build. +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -o %t/nodiag.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -fmodule-file=%t/nodiag.pcm -Wpadded +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -fmodule-file=%t/nodiag.pcm -Werror -Wpadded +// +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -o %t/warning.pcm -Wpadded +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/warning.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/warning.pcm -Werror=padded +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/warning.pcm -Werror +// +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -o %t/werror-no-error.pcm -Werror -Wpadded -Wno-error=padded +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/werror-no-error.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/werror-no-error.pcm -Wno-padded +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DWARNING -fmodule-file=%t/werror-no-error.pcm -Werror=padded +// +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -o %t/error.pcm -Werror=padded +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -fmodule-file=%t/error.pcm -Wno-padded +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -fmodule-file=%t/error.pcm -Wno-error=padded +// +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -emit-module -fmodule-name=diag_flags -x c++ %S/Inputs/module.map -fmodules-ts -o %t/werror.pcm -Werror -Wpadded +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -fmodule-file=%t/werror.pcm -Wno-error +// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -fmodules-ts -DERROR -fmodule-file=%t/werror.pcm -Wno-padded import diag_flags; // Diagnostic flags from the module user make no difference to diagnostics // emitted within the module when using an explicitly-loaded module. -#ifdef IMPLICIT_FLAG +#if ERROR // expected-error@diag_flags.h:14 {{padding struct}} -#elif defined(EXPLICIT_FLAG) +#elif WARNING // expected-warning@diag_flags.h:14 {{padding struct}} #else // expected-no-diagnostics diff --git a/test/Modules/odr_hash.cpp b/test/Modules/odr_hash.cpp index e57c36f3864cf..294e925627c6e 100644 --- a/test/Modules/odr_hash.cpp +++ b/test/Modules/odr_hash.cpp @@ -275,6 +275,33 @@ S11 s11; // expected-note@first.h:* {{but in 'FirstModule' found field 'x' with a different initializer}} #endif +#if defined(FIRST) +struct S12 { + unsigned x[5]; +}; +#elif defined(SECOND) +struct S12 { + unsigned x[7]; +}; +#else +S12 s12; +// expected-error@first.h:* {{'Field::S12::x' from module 'FirstModule' is not present in definition of 'Field::S12' in module 'SecondModule'}} +// expected-note@second.h:* {{declaration of 'x' does not match}} +#endif + +#if defined(FIRST) +struct S13 { + unsigned x[7]; +}; +#elif defined(SECOND) +struct S13 { + double x[7]; +}; +#else +S13 s13; +// expected-error@first.h:* {{'Field::S13::x' from module 'FirstModule' is not present in definition of 'Field::S13' in module 'SecondModule'}} +// expected-note@second.h:* {{declaration of 'x' does not match}} +#endif } // namespace Field namespace Method { @@ -403,6 +430,91 @@ S8 s8; // expected-note@first.h:* {{but in 'FirstModule' found method 'A' is const}} #endif +#if defined(FIRST) +struct S9 { + void A(int x) {} + void A(int x, int y) {} +}; +#elif defined(SECOND) +struct S9 { + void A(int x, int y) {} + void A(int x) {} +}; +#else +S9 s9; +// expected-error@second.h:* {{'Method::S9' has different definitions in different modules; first difference is definition in module 'SecondModule' found method 'A' that has 2 parameters}} +// expected-note@first.h:* {{but in 'FirstModule' found method 'A' that has 1 parameter}} +#endif + +#if defined(FIRST) +struct S10 { + void A(int x) {} + void A(float x) {} +}; +#elif defined(SECOND) +struct S10 { + void A(float x) {} + void A(int x) {} +}; +#else +S10 s10; +// expected-error@second.h:* {{'Method::S10' has different definitions in different modules; first difference is definition in module 'SecondModule' found method 'A' with 1st parameter of type 'float'}} +// expected-note@first.h:* {{but in 'FirstModule' found method 'A' with 1st parameter of type 'int'}} +#endif + +#if defined(FIRST) +struct S11 { + void A(int x) {} +}; +#elif defined(SECOND) +struct S11 { + void A(int y) {} +}; +#else +S11 s11; +// expected-error@second.h:* {{'Method::S11' has different definitions in different modules; first difference is definition in module 'SecondModule' found method 'A' with 1st parameter named 'y'}} +// expected-note@first.h:* {{but in 'FirstModule' found method 'A' with 1st parameter named 'x'}} +#endif + +#if defined(FIRST) +struct S12 { + void A(int x) {} +}; +#elif defined(SECOND) +struct S12 { + void A(int x = 1) {} +}; +#else +S12 s12; +// TODO: This should produce an error. +#endif + +#if defined(FIRST) +struct S13 { + void A(int x = 1 + 0) {} +}; +#elif defined(SECOND) +struct S13 { + void A(int x = 1) {} +}; +#else +S13 s13; +// TODO: This should produce an error. +#endif + +#if defined(FIRST) +struct S14 { + void A(int x[2]) {} +}; +#elif defined(SECOND) +struct S14 { + void A(int x[3]) {} +}; +#else +S14 s14; +// expected-error@second.h:* {{'Method::S14' has different definitions in different modules; first difference is definition in module 'SecondModule' found method 'A' with 1st parameter of type 'int *' decayed from 'int [3]'}} +// expected-note@first.h:* {{but in 'FirstModule' found method 'A' with 1st parameter of type 'int *' decayed from 'int [2]'}} +#endif } // namespace Method // Naive parsing of AST can lead to cycles in processing. Ensure @@ -526,37 +638,43 @@ S3 s3; // Interesting cases that should not cause errors. struct S should not error // while struct T should error at the access specifier mismatch at the end. namespace AllDecls { -#define CREATE_ALL_DECL_STRUCT(NAME, ACCESS) \ - typedef int INT; \ - struct NAME { \ - public: \ - private: \ - protected: \ - static_assert(1 == 1, "Message"); \ - static_assert(2 == 2); \ - \ - int x; \ - double y; \ - \ - INT z; \ - \ - unsigned a : 1; \ - unsigned b : 2 * 2 + 5 / 2; \ - \ - mutable int c = sizeof(x + y); \ - \ - void method() {} \ - static void static_method() {} \ - virtual void virtual_method() {} \ - virtual void pure_virtual_method() = 0; \ - inline void inline_method() {} \ - void volatile_method() volatile {} \ - void const_method() const {} \ - \ - typedef int typedef_int; \ - using using_int = int; \ - \ - ACCESS: \ +#define CREATE_ALL_DECL_STRUCT(NAME, ACCESS) \ + typedef int INT; \ + struct NAME { \ + public: \ + private: \ + protected: \ + static_assert(1 == 1, "Message"); \ + static_assert(2 == 2); \ + \ + int x; \ + double y; \ + \ + INT z; \ + \ + unsigned a : 1; \ + unsigned b : 2 * 2 + 5 / 2; \ + \ + mutable int c = sizeof(x + y); \ + \ + void method() {} \ + static void static_method() {} \ + virtual void virtual_method() {} \ + virtual void pure_virtual_method() = 0; \ + inline void inline_method() {} \ + void volatile_method() volatile {} \ + void const_method() const {} \ + \ + typedef int typedef_int; \ + using using_int = int; \ + \ + void method_one_arg(int x) {} \ + void method_one_arg_default_argument(int x = 5 + 5) {} \ + void method_decayed_type(int x[5]) {} \ + \ + int constant_arr[5]; \ + \ + ACCESS: \ }; #if defined(FIRST) @@ -933,6 +1051,34 @@ Alpha::Alpha() {} #endif } +namespace ParameterTest { +#if defined(FIRST) +class X {}; +template +class S { + public: + typedef G Type; + static inline G *Foo(const G *a, int * = nullptr); +}; + +template +G* S::Foo(const G* aaaa, int*) {} +#elif defined(SECOND) +template +class S { + public: + typedef G Type; + static inline G *Foo(const G *a, int * = nullptr); +}; + +template +G* S::Foo(const G* asdf, int*) {} +#else +S s; +#endif +} + + // Keep macros contained to one file. #ifdef FIRST #undef FIRST diff --git a/test/OpenMP/target_ast_print.cpp b/test/OpenMP/target_ast_print.cpp index 032a39fe5e82d..d004f738fc0d3 100644 --- a/test/OpenMP/target_ast_print.cpp +++ b/test/OpenMP/target_ast_print.cpp @@ -10,7 +10,7 @@ void foo() {} template T tmain(T argc, T *argv) { - T i, j, a[20]; + T i, j, a[20], always; #pragma omp target foo(); #pragma omp target if (target:argc > 0) @@ -25,6 +25,12 @@ T tmain(T argc, T *argv) { foo(); #pragma omp target map(always,alloc: i) foo(); +#pragma omp target map(always from: i) + foo(); +#pragma omp target map(always) + {always++;} +#pragma omp target map(always,i) + {always++;i++;} #pragma omp target nowait foo(); #pragma omp target depend(in : argc, argv[i:argc], a[:]) @@ -50,6 +56,17 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp target map(always,alloc: i) // CHECK-NEXT: foo() +// CHECK-NEXT: #pragma omp target map(always,from: i) +// CHECK-NEXT: foo() +// CHECK-NEXT: #pragma omp target map(tofrom: always) +// CHECK-NEXT: { +// CHECK-NEXT: always++; +// CHECK-NEXT: } +// CHECK-NEXT: #pragma omp target map(tofrom: always,i) +// CHECK-NEXT: { +// CHECK-NEXT: always++; +// CHECK-NEXT: i++; +// CHECK-NEXT: } // CHECK-NEXT: #pragma omp target nowait // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp target depend(in : argc,argv[i:argc],a[:]) @@ -72,6 +89,17 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp target map(always,alloc: i) // CHECK-NEXT: foo() +// CHECK-NEXT: #pragma omp target map(always,from: i) +// CHECK-NEXT: foo() +// CHECK-NEXT: #pragma omp target map(tofrom: always) +// CHECK-NEXT: { +// CHECK-NEXT: always++; +// CHECK-NEXT: } +// CHECK-NEXT: #pragma omp target map(tofrom: always,i) +// CHECK-NEXT: { +// CHECK-NEXT: always++; +// CHECK-NEXT: i++; +// CHECK-NEXT: } // CHECK-NEXT: #pragma omp target nowait // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp target depend(in : argc,argv[i:argc],a[:]) @@ -94,6 +122,17 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp target map(always,alloc: i) // CHECK-NEXT: foo() +// CHECK-NEXT: #pragma omp target map(always,from: i) +// CHECK-NEXT: foo() +// CHECK-NEXT: #pragma omp target map(tofrom: always) +// CHECK-NEXT: { +// CHECK-NEXT: always++; +// CHECK-NEXT: } +// CHECK-NEXT: #pragma omp target map(tofrom: always,i) +// CHECK-NEXT: { +// CHECK-NEXT: always++; +// CHECK-NEXT: i++; +// CHECK-NEXT: } // CHECK-NEXT: #pragma omp target nowait // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp target depend(in : argc,argv[i:argc],a[:]) @@ -103,7 +142,7 @@ T tmain(T argc, T *argv) { // CHECK-LABEL: int main(int argc, char **argv) { int main (int argc, char **argv) { - int i, j, a[20]; + int i, j, a[20], always; // CHECK-NEXT: int i, j, a[20] #pragma omp target // CHECK-NEXT: #pragma omp target @@ -139,6 +178,26 @@ int main (int argc, char **argv) { foo(); // CHECK-NEXT: foo(); +#pragma omp target map(always from: i) +// CHECK-NEXT: #pragma omp target map(always,from: i) + foo(); +// CHECK-NEXT: foo(); + +#pragma omp target map(always) +// CHECK-NEXT: #pragma omp target map(tofrom: always) + {always++;} +// CHECK-NEXT: { +// CHECK-NEXT: always++; +// CHECK-NEXT: } + +#pragma omp target map(always,i) +// CHECK-NEXT: #pragma omp target map(tofrom: always,i) + {always++;i++;} +// CHECK-NEXT: { +// CHECK-NEXT: always++; +// CHECK-NEXT: i++; +// CHECK-NEXT: } + #pragma omp target nowait // CHECK-NEXT: #pragma omp target nowait foo(); diff --git a/test/OpenMP/target_map_messages.cpp b/test/OpenMP/target_map_messages.cpp index 93f0216dd2331..9d166ae9a1d97 100644 --- a/test/OpenMP/target_map_messages.cpp +++ b/test/OpenMP/target_map_messages.cpp @@ -66,6 +66,8 @@ struct SA { {} #pragma omp target map(always, tofrom: c,f[:]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}} {} + #pragma omp target map(always) // expected-error {{use of undeclared identifier 'always'}} + {} return; } }; diff --git a/test/Sema/varargs.c b/test/Sema/varargs.c index 25a5c72c42e3c..0ade0cf0aaf9a 100644 --- a/test/Sema/varargs.c +++ b/test/Sema/varargs.c @@ -27,7 +27,7 @@ void f3(float a, ...) { // expected-note 2{{parameter of type 'float' is declare } -// stdarg: PR3075 +// stdarg: PR3075 and PR2531 void f4(const char *msg, ...) { __builtin_va_list ap; __builtin_stdarg_start((ap), (msg)); diff --git a/test/SemaCXX/constexpr-array-unknown-bound.cpp b/test/SemaCXX/constexpr-array-unknown-bound.cpp deleted file mode 100644 index 1d14623719840..0000000000000 --- a/test/SemaCXX/constexpr-array-unknown-bound.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// RUN: %clang_cc1 %s -Wno-uninitialized -std=c++1z -fsyntax-only -verify - -const extern int arr[]; -constexpr auto p = arr; // ok -constexpr int f(int i) {return p[i];} // expected-note {{read of dereferenced one-past-the-end pointer}} - -constexpr int arr[] {1, 2, 3}; -constexpr auto p2 = arr + 2; // ok -constexpr int x = f(2); // ok -constexpr int y = f(3); // expected-error {{constant expression}} -// expected-note-re@-1 {{in call to 'f({{.*}})'}} - -struct A {int m[];} a; -constexpr auto p3 = a.m; // ok -constexpr auto p4 = a.m + 1; // expected-error {{constant expression}} expected-note {{constant bound}} - -void g(int i) { - int arr[i]; - constexpr auto *p = arr + 2; // expected-error {{constant expression}} expected-note {{constant bound}} - - // FIXME: Give a better diagnostic here. The issue is that computing - // sizeof(*arr2) within the array indexing fails due to the VLA. - int arr2[2][i]; - constexpr int m = ((void)arr2[2], 0); // expected-error {{constant expression}} -} diff --git a/test/SemaCXX/cxx1z-lambda-star-this.cpp b/test/SemaCXX/cxx1z-lambda-star-this.cpp index a84e653f5c837..5a16f2026ff26 100644 --- a/test/SemaCXX/cxx1z-lambda-star-this.cpp +++ b/test/SemaCXX/cxx1z-lambda-star-this.cpp @@ -3,229 +3,298 @@ // RUN: %clang_cc1 -std=c++1z -verify -fsyntax-only -fblocks -fms-extensions %s -DMS_EXTENSIONS // RUN: %clang_cc1 -std=c++1z -verify -fsyntax-only -fblocks -fdelayed-template-parsing -fms-extensions %s -DMS_EXTENSIONS -DDELAYED_TEMPLATE_PARSING -template constexpr bool is_same = false; -template constexpr bool is_same = true; +template +constexpr bool is_same = false; +template +constexpr bool is_same = true; namespace test_star_this { namespace ns1 { class A { int x = 345; auto foo() { - (void) [*this, this] { }; //expected-error{{'this' can appear only once}} - (void) [this] { ++x; }; - (void) [*this] { ++x; }; //expected-error{{read-only variable}} - (void) [*this] () mutable { ++x; }; - (void) [=] { return x; }; - (void) [&, this] { return x; }; - (void) [=, *this] { return x; }; - (void) [&, *this] { return x; }; + (void)[ *this, this ]{}; //expected-error{{'this' can appear only once}} + (void)[this] { ++x; }; + (void)[*this] { ++x; }; //expected-error{{read-only variable}} + (void)[*this]() mutable { ++x; }; + (void)[=] { return x; }; + (void)[&, this ] { return x; }; + (void)[ =, *this ] { return x; }; + (void)[&, *this ] { return x; }; } }; -} // end ns1 +} // namespace ns1 namespace ns2 { - class B { - B(const B&) = delete; //expected-note{{deleted here}} - int *x = (int *) 456; - void foo() { - (void)[this] { return x; }; - (void)[*this] { return x; }; //expected-error{{call to deleted}} - } - }; -} // end ns2 +class B { + B(const B &) = delete; //expected-note{{deleted here}} + int *x = (int *)456; + void foo() { + (void)[this] { return x; }; + (void)[*this] { return x; }; //expected-error{{call to deleted}} + } +}; +} // namespace ns2 + namespace ns3 { - class B { - B(const B&) = delete; //expected-note2{{deleted here}} - - int *x = (int *) 456; - public: - template - void foo() { - (void)[this] { return x; }; - (void)[*this] { return x; }; //expected-error2{{call to deleted}} - } - - B() = default; - } b; - B *c = (b.foo(), nullptr); //expected-note{{in instantiation}} -} // end ns3 +class B { + B(const B &) = delete; //expected-note2{{deleted here}} + + int *x = (int *)456; + +public: + template + void foo() { + (void)[this] { return x; }; + (void)[*this] { return x; }; //expected-error2{{call to deleted}} + } + + B() = default; +} b; +B *c = (b.foo(), nullptr); //expected-note{{in instantiation}} +} // namespace ns3 namespace ns4 { -template +template class B { - B(const B&) = delete; //expected-note{{deleted here}} + B(const B &) = delete; //expected-note{{deleted here}} double d = 3.14; - public: - template + +public: + template auto foo() { - const auto &L = [*this] (auto a) mutable { //expected-error{{call to deleted}} - d += a; - return [this] (auto b) { return d +=b; }; - }; + const auto &L = [*this](auto a) mutable { //expected-error{{call to deleted}} + d += a; + return [this](auto b) { return d += b; }; + }; } - + B() = default; }; void main() { - B b; + B b; b.foo(); //expected-note{{in instantiation}} -} // end main -} // end ns4 +} // end main +} // namespace ns4 + namespace ns5 { struct X { double d = 3.14; - X(const volatile X&); + X(const volatile X &); void foo() { - } - + void foo() const { //expected-note{{const}} - - auto L = [*this] () mutable { - static_assert(is_same); + + auto L = [*this]() mutable { + static_assert(is_same); ++d; - auto M = [this] { - static_assert(is_same); + auto M = [this] { + static_assert(is_same); ++d; auto N = [] { - static_assert(is_same); + static_assert(is_same); }; }; }; - - auto L1 = [*this] { - static_assert(is_same); - auto M = [this] () mutable { - static_assert(is_same); + + auto L1 = [*this] { + static_assert(is_same); + auto M = [this]() mutable { + static_assert(is_same); auto N = [] { - static_assert(is_same); + static_assert(is_same); }; }; - auto M2 = [*this] () mutable { - static_assert(is_same); + auto M2 = [*this]() mutable { + static_assert(is_same); auto N = [] { - static_assert(is_same); + static_assert(is_same); }; }; }; - - auto GL1 = [*this] (auto a) { - static_assert(is_same); - auto M = [this] (auto b) mutable { - static_assert(is_same); - auto N = [] (auto c) { - static_assert(is_same); + + auto GL1 = [*this](auto a) { + static_assert(is_same); + auto M = [this](auto b) mutable { + static_assert(is_same); + auto N = [](auto c) { + static_assert(is_same); }; return N; }; - - auto M2 = [*this] (auto a) mutable { - static_assert(is_same); - auto N = [] (auto b) { - static_assert(is_same); + + auto M2 = [*this](auto a) mutable { + static_assert(is_same); + auto N = [](auto b) { + static_assert(is_same); }; return N; }; return [=](auto a) mutable { M(a)(a); M2(a)(a); }; }; - - GL1("abc")("abc"); - - - auto L2 = [this] () mutable { - static_assert(is_same); + + GL1("abc") + ("abc"); + + auto L2 = [this]() mutable { + static_assert(is_same); ++d; //expected-error{{cannot assign}} }; - auto GL = [*this] (auto a) mutable { - static_assert(is_same); + auto GL = [*this](auto a) mutable { + static_assert(is_same); ++d; - auto M = [this] (auto b) { - static_assert(is_same); + auto M = [this](auto b) { + static_assert(is_same); ++d; - auto N = [] (auto c) { - static_assert(is_same); + auto N = [](auto c) { + static_assert(is_same); }; N(3.14); }; M("abc"); }; GL(3.14); - } void foo() volatile const { - auto L = [this] () { - static_assert(is_same); - auto M = [*this] () mutable { - static_assert(is_same); + auto L = [this]() { + static_assert(is_same); + auto M = [*this]() mutable { + static_assert(is_same); auto N = [this] { - static_assert(is_same); + static_assert(is_same); auto M = [] { - static_assert(is_same); + static_assert(is_same); }; }; auto N2 = [*this] { - static_assert(is_same); + static_assert(is_same); }; }; - auto M2 = [*this] () { - static_assert(is_same); + auto M2 = [*this]() { + static_assert(is_same); auto N = [this] { - static_assert(is_same); + static_assert(is_same); }; }; }; } - }; -} //end ns5 +} // namespace ns5 namespace ns6 { struct X { double d; auto foo() const { - auto L = [*this] () mutable { - auto M = [=] (auto a) { + auto L = [*this]() mutable { + auto M = [=](auto a) { auto N = [this] { ++d; - static_assert(is_same); + static_assert(is_same); auto O = [*this] { - static_assert(is_same); + static_assert(is_same); }; }; N(); - static_assert(is_same); + static_assert(is_same); }; return M; }; return L; } -}; +}; int main() { auto L = X{}.foo(); auto M = L(); M(3.14); } -} // end ns6 +} // namespace ns6 namespace ns7 { struct X { double d; X(); - X(const X&); - X(X&) = delete; + X(const X &); + X(X &) = delete; auto foo() const { //OK - the object used to initialize our capture is a const object and so prefers the non-deleted ctor. - const auto &&L = [*this] { }; + const auto &&L = [*this]{}; } - -}; +}; int main() { X x; x.foo(); } -} // end ns7 +} // namespace ns7 + +} // namespace test_star_this + +namespace PR32831 { +// https://bugs.llvm.org/show_bug.cgi?id=32831 +namespace ns1 { +template +void fun_template(Func func) { + (void)[&]() { + func(0); + }; +} + +class A { + void member_foo() { + (void)[this] { + (void)[this] { + fun_template( + [this](auto X) { + auto L = [this](auto Y) { member_foo(); }; + L(5); + }); + fun_template( + [this](auto) { member_foo(); }); + }; + }; + } +}; +} // namespace ns1 + +namespace ns2 { + +struct B { + int data = 0; + template + void mem2(F f) { + (void)[&](auto f) { + (void)[&] { f(this->data); }; + } + (f); + } +}; + +class A { + void member_foo() { + (void)[this] { + (void)[this] { + B{}.mem2( + [this](auto X) { + auto L = [this](auto Y) { member_foo(); }; + L(5); + }); + B{}.mem2( + [this](auto) { member_foo(); }); + }; + }; + } + int data = 0; + auto m2() { + return [this] { return [] () -> decltype(data){ return 0; }; }; + } + auto m3() { + return [] { return [] () -> decltype(data){ return 0; }; }; + } +}; + +} // namespace ns2 -} //end ns test_star_this +} // namespace PR32831 diff --git a/test/SemaCXX/warn-thread-safety-parsing.cpp b/test/SemaCXX/warn-thread-safety-parsing.cpp index b43e24a897bfd..ae32bfe9c913e 100644 --- a/test/SemaCXX/warn-thread-safety-parsing.cpp +++ b/test/SemaCXX/warn-thread-safety-parsing.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fsyntax-only -verify -Wthread-safety %s // RUN: %clang_cc1 -fsyntax-only -verify -Wthread-safety -std=c++98 %s -// RUN: %clang_cc1 -fsyntax-only -verify -Wthread-safety -std=c++11 %s +// RUN: %clang_cc1 -fsyntax-only -verify -Wthread-safety -std=c++11 %s -D CPP11 #define LOCKABLE __attribute__ ((lockable)) #define SCOPED_LOCKABLE __attribute__ ((scoped_lockable)) @@ -1513,3 +1513,15 @@ public: } // end namespace FunctionAttributesInsideClass_ICE_Test + +#ifdef CPP11 +namespace CRASH_POST_R301735 { + class SomeClass { + public: + void foo() { + auto l = [this] { auto l = [] () EXCLUSIVE_LOCKS_REQUIRED(mu_) {}; }; + } + Mutex mu_; + }; +} +#endif \ No newline at end of file diff --git a/test/SemaObjCXX/arc-overloading.mm b/test/SemaObjCXX/arc-overloading.mm index a74941721172b..3ac9c51293b73 100644 --- a/test/SemaObjCXX/arc-overloading.mm +++ b/test/SemaObjCXX/arc-overloading.mm @@ -199,4 +199,4 @@ class rdar10142572 { }; id rdar10142572::f() { return 0; } // okay: merged down -id __attribute__((ns_returns_retained)) rdar10142572::g() { return 0; } // expected-error{{function declared with the ns_returns_retained attribute was previously declared without the ns_returns_retained attribute}} +id __attribute__((ns_returns_retained)) rdar10142572::g() { return 0; } // expected-error{{function declared with 'ns_returns_retained' attribute was previously declared without the 'ns_returns_retained' attribute}} diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp index 86f1047dee938..c251d83e20976 100644 --- a/tools/libclang/CIndex.cpp +++ b/tools/libclang/CIndex.cpp @@ -3313,7 +3313,7 @@ clang_parseTranslationUnit_Impl(CXIndex CIdx, const char *source_filename, Diags(CompilerInstance::createDiagnostics(new DiagnosticOptions)); if (options & CXTranslationUnit_KeepGoing) - Diags->setFatalsAsError(true); + Diags->setSuppressAfterFatalError(false); // Recover resources if we crash before exiting this function. llvm::CrashRecoveryContextCleanupRegistrar