diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-28 21:23:03 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-28 21:23:03 +0000 | 
| commit | 55e6d896ad333f07bb3b1ba487df214fc268a4ab (patch) | |
| tree | 9ac2087dfbe8507c56dd39d17cad42836448829f /lib | |
| parent | 1de93ee5610e8a97e753c881c574f8d994e71373 (diff) | |
Diffstat (limited to 'lib')
40 files changed, 2829 insertions, 206 deletions
diff --git a/lib/AST/ASTImporter.cpp b/lib/AST/ASTImporter.cpp index 0e627f9737ce..0d1d9807549f 100644 --- a/lib/AST/ASTImporter.cpp +++ b/lib/AST/ASTImporter.cpp @@ -134,12 +134,17 @@ namespace clang {      bool ImportTemplateArguments(const TemplateArgument *FromArgs,                                   unsigned NumFromArgs,                                 SmallVectorImpl<TemplateArgument> &ToArgs); +    template <typename InContainerTy> +    bool ImportTemplateArgumentListInfo(const InContainerTy &Container, +                                        TemplateArgumentListInfo &ToTAInfo);      bool IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord,                             bool Complain = true);      bool IsStructuralMatch(VarDecl *FromVar, VarDecl *ToVar,                             bool Complain = true);      bool IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToRecord);      bool IsStructuralMatch(EnumConstantDecl *FromEC, EnumConstantDecl *ToEC); +    bool IsStructuralMatch(FunctionTemplateDecl *From, +                           FunctionTemplateDecl *To);      bool IsStructuralMatch(ClassTemplateDecl *From, ClassTemplateDecl *To);      bool IsStructuralMatch(VarTemplateDecl *From, VarTemplateDecl *To);      Decl *VisitDecl(Decl *D); @@ -195,6 +200,7 @@ namespace clang {                                              ClassTemplateSpecializationDecl *D);      Decl *VisitVarTemplateDecl(VarTemplateDecl *D);      Decl *VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D); +    Decl *VisitFunctionTemplateDecl(FunctionTemplateDecl *D);      // Importing statements      DeclGroupRef ImportDeclGroup(DeclGroupRef DG); @@ -280,6 +286,7 @@ namespace clang {      Expr *VisitCXXDeleteExpr(CXXDeleteExpr *E);      Expr *VisitCXXConstructExpr(CXXConstructExpr *E);      Expr *VisitCXXMemberCallExpr(CXXMemberCallExpr *E); +    Expr *VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E);      Expr *VisitExprWithCleanups(ExprWithCleanups *EWC);      Expr *VisitCXXThisExpr(CXXThisExpr *E);      Expr *VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E); @@ -1247,6 +1254,18 @@ bool ASTNodeImporter::ImportTemplateArguments(const TemplateArgument *FromArgs,    return false;  } +template <typename InContainerTy> +bool ASTNodeImporter::ImportTemplateArgumentListInfo( +    const InContainerTy &Container, TemplateArgumentListInfo &ToTAInfo) { +  for (const auto &FromLoc : Container) { +    if (auto ToLoc = ImportTemplateArgumentLoc(FromLoc)) +      ToTAInfo.addArgument(*ToLoc); +    else +      return true; +  } +  return false; +} +  bool ASTNodeImporter::IsStructuralMatch(RecordDecl *FromRecord,                                           RecordDecl *ToRecord, bool Complain) {    // Eliminate a potential failure point where we attempt to re-import @@ -1280,6 +1299,14 @@ bool ASTNodeImporter::IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToEnum) {    return Ctx.IsStructurallyEquivalent(FromEnum, ToEnum);  } +bool ASTNodeImporter::IsStructuralMatch(FunctionTemplateDecl *From, +                                        FunctionTemplateDecl *To) { +  StructuralEquivalenceContext Ctx( +      Importer.getFromContext(), Importer.getToContext(), +      Importer.getNonEquivalentDecls(), false, false); +  return Ctx.IsStructurallyEquivalent(From, To); +} +  bool ASTNodeImporter::IsStructuralMatch(EnumConstantDecl *FromEC,                                          EnumConstantDecl *ToEC)  { @@ -4197,6 +4224,64 @@ Decl *ASTNodeImporter::VisitVarTemplateSpecializationDecl(    return D2;  } +Decl *ASTNodeImporter::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { +  DeclContext *DC, *LexicalDC; +  DeclarationName Name; +  SourceLocation Loc; +  NamedDecl *ToD; + +  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc)) +    return nullptr; + +  if (ToD) +    return ToD; + +  // Try to find a function in our own ("to") context with the same name, same +  // type, and in the same context as the function we're importing. +  if (!LexicalDC->isFunctionOrMethod()) { +    unsigned IDNS = Decl::IDNS_Ordinary; +    SmallVector<NamedDecl *, 2> FoundDecls; +    DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls); +    for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) { +      if (!FoundDecls[I]->isInIdentifierNamespace(IDNS)) +        continue; + +      if (FunctionTemplateDecl *FoundFunction = +              dyn_cast<FunctionTemplateDecl>(FoundDecls[I])) { +        if (FoundFunction->hasExternalFormalLinkage() && +            D->hasExternalFormalLinkage()) { +          if (IsStructuralMatch(D, FoundFunction)) { +            Importer.Imported(D, FoundFunction); +            // FIXME: Actually try to merge the body and other attributes. +            return FoundFunction; +          } +        } +      } +    } +  } + +  TemplateParameterList *Params = +      ImportTemplateParameterList(D->getTemplateParameters()); +  if (!Params) +    return nullptr; + +  FunctionDecl *TemplatedFD = +      cast_or_null<FunctionDecl>(Importer.Import(D->getTemplatedDecl())); +  if (!TemplatedFD) +    return nullptr; + +  FunctionTemplateDecl *ToFunc = FunctionTemplateDecl::Create( +      Importer.getToContext(), DC, Loc, Name, Params, TemplatedFD); + +  TemplatedFD->setDescribedFunctionTemplate(ToFunc); +  ToFunc->setAccess(D->getAccess()); +  ToFunc->setLexicalDeclContext(LexicalDC); +  Importer.Imported(D, ToFunc); + +  LexicalDC->addDeclInternal(ToFunc); +  return ToFunc; +} +  //----------------------------------------------------------------------------  // Import Statements  //---------------------------------------------------------------------------- @@ -4321,9 +4406,8 @@ Stmt *ASTNodeImporter::VisitCompoundStmt(CompoundStmt *S) {    SourceLocation ToLBraceLoc = Importer.Import(S->getLBracLoc());    SourceLocation ToRBraceLoc = Importer.Import(S->getRBracLoc()); -  return new (Importer.getToContext()) CompoundStmt(Importer.getToContext(), -                                                    ToStmts, -                                                    ToLBraceLoc, ToRBraceLoc); +  return CompoundStmt::Create(Importer.getToContext(), ToStmts, ToLBraceLoc, +                              ToRBraceLoc);  }  Stmt *ASTNodeImporter::VisitCaseStmt(CaseStmt *S) { @@ -5759,6 +5843,47 @@ Expr *ASTNodeImporter::VisitCXXPseudoDestructorExpr(          Importer.Import(E->getTildeLoc()), Storage);  } +Expr *ASTNodeImporter::VisitCXXDependentScopeMemberExpr( +    CXXDependentScopeMemberExpr *E) { +  Expr *Base = nullptr; +  if (!E->isImplicitAccess()) { +    Base = Importer.Import(E->getBase()); +    if (!Base) +      return nullptr; +  } + +  QualType BaseType = Importer.Import(E->getBaseType()); +  if (BaseType.isNull()) +    return nullptr; + +  TemplateArgumentListInfo ToTAInfo(Importer.Import(E->getLAngleLoc()), +                                    Importer.Import(E->getRAngleLoc())); +  TemplateArgumentListInfo *ResInfo = nullptr; +  if (E->hasExplicitTemplateArgs()) { +    if (ImportTemplateArgumentListInfo(E->template_arguments(), ToTAInfo)) +      return nullptr; +    ResInfo = &ToTAInfo; +  } + +  DeclarationName Name = Importer.Import(E->getMember()); +  if (!E->getMember().isEmpty() && Name.isEmpty()) +    return nullptr; + +  DeclarationNameInfo MemberNameInfo(Name, Importer.Import(E->getMemberLoc())); +  // Import additional name location/type info. +  ImportDeclarationNameLoc(E->getMemberNameInfo(), MemberNameInfo); +  auto ToFQ = Importer.Import(E->getFirstQualifierFoundInScope()); +  if (!ToFQ && E->getFirstQualifierFoundInScope()) +    return nullptr; + +  return CXXDependentScopeMemberExpr::Create( +      Importer.getToContext(), Base, BaseType, E->isArrow(), +      Importer.Import(E->getOperatorLoc()), +      Importer.Import(E->getQualifierLoc()), +      Importer.Import(E->getTemplateKeywordLoc()), +      cast_or_null<NamedDecl>(ToFQ), MemberNameInfo, ResInfo); +} +  Expr *ASTNodeImporter::VisitCallExpr(CallExpr *E) {    QualType T = Importer.Import(E->getType());    if (T.isNull()) diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp index 629037b1755c..4c1d591b41e9 100644 --- a/lib/AST/Decl.cpp +++ b/lib/AST/Decl.cpp @@ -1550,7 +1550,7 @@ void NamedDecl::printQualifiedName(raw_ostream &OS,        // the enum-specifier. Each scoped enumerator is declared in the        // scope of the enumeration.        // For the case of unscoped enumerator, do not include in the qualified -      // name any information about its enum enclosing scope, as is visibility +      // name any information about its enum enclosing scope, as its visibility        // is global.        if (ED->isScoped())          OS << *ED; diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp index 8d240c1336ab..982fd458493f 100644 --- a/lib/AST/Stmt.cpp +++ b/lib/AST/Stmt.cpp @@ -299,31 +299,34 @@ SourceLocation Stmt::getLocEnd() const {    llvm_unreachable("unknown statement kind");  } -CompoundStmt::CompoundStmt(const ASTContext &C, ArrayRef<Stmt*> Stmts, -                           SourceLocation LB, SourceLocation RB) -  : Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) { +CompoundStmt::CompoundStmt(ArrayRef<Stmt *> Stmts, SourceLocation LB, +                           SourceLocation RB) +    : Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) {    CompoundStmtBits.NumStmts = Stmts.size(); +  setStmts(Stmts); +} + +void CompoundStmt::setStmts(ArrayRef<Stmt *> Stmts) {    assert(CompoundStmtBits.NumStmts == Stmts.size() &&           "NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!"); -  if (Stmts.empty()) { -    Body = nullptr; -    return; -  } - -  Body = new (C) Stmt*[Stmts.size()]; -  std::copy(Stmts.begin(), Stmts.end(), Body); +  std::copy(Stmts.begin(), Stmts.end(), body_begin());  } -void CompoundStmt::setStmts(const ASTContext &C, ArrayRef<Stmt *> Stmts) { -  if (Body) -    C.Deallocate(Body); -  CompoundStmtBits.NumStmts = Stmts.size(); -  assert(CompoundStmtBits.NumStmts == Stmts.size() && -         "NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!"); +CompoundStmt *CompoundStmt::Create(const ASTContext &C, ArrayRef<Stmt *> Stmts, +                                   SourceLocation LB, SourceLocation RB) { +  void *Mem = +      C.Allocate(totalSizeToAlloc<Stmt *>(Stmts.size()), alignof(CompoundStmt)); +  return new (Mem) CompoundStmt(Stmts, LB, RB); +} -  Body = new (C) Stmt*[Stmts.size()]; -  std::copy(Stmts.begin(), Stmts.end(), Body); +CompoundStmt *CompoundStmt::CreateEmpty(const ASTContext &C, +                                        unsigned NumStmts) { +  void *Mem = +      C.Allocate(totalSizeToAlloc<Stmt *>(NumStmts), alignof(CompoundStmt)); +  CompoundStmt *New = new (Mem) CompoundStmt(EmptyShell()); +  New->CompoundStmtBits.NumStmts = NumStmts; +  return New;  }  const char *LabelStmt::getName() const { @@ -334,7 +337,7 @@ AttributedStmt *AttributedStmt::Create(const ASTContext &C, SourceLocation Loc,                                         ArrayRef<const Attr*> Attrs,                                         Stmt *SubStmt) {    assert(!Attrs.empty() && "Attrs should not be empty"); -  void *Mem = C.Allocate(sizeof(AttributedStmt) + sizeof(Attr *) * Attrs.size(), +  void *Mem = C.Allocate(totalSizeToAlloc<const Attr *>(Attrs.size()),                           alignof(AttributedStmt));    return new (Mem) AttributedStmt(Loc, Attrs, SubStmt);  } @@ -342,7 +345,7 @@ AttributedStmt *AttributedStmt::Create(const ASTContext &C, SourceLocation Loc,  AttributedStmt *AttributedStmt::CreateEmpty(const ASTContext &C,                                              unsigned NumAttrs) {    assert(NumAttrs > 0 && "NumAttrs should be greater than zero"); -  void *Mem = C.Allocate(sizeof(AttributedStmt) + sizeof(Attr *) * NumAttrs, +  void *Mem = C.Allocate(totalSizeToAlloc<const Attr *>(NumAttrs),                           alignof(AttributedStmt));    return new (Mem) AttributedStmt(EmptyShell(), NumAttrs);  } diff --git a/lib/Analysis/BodyFarm.cpp b/lib/Analysis/BodyFarm.cpp index e5d3c5ce5bc2..89ca8484819d 100644 --- a/lib/Analysis/BodyFarm.cpp +++ b/lib/Analysis/BodyFarm.cpp @@ -133,7 +133,7 @@ BinaryOperator *ASTMaker::makeComparison(const Expr *LHS, const Expr *RHS,  }  CompoundStmt *ASTMaker::makeCompound(ArrayRef<Stmt *> Stmts) { -  return new (C) CompoundStmt(C, Stmts, SourceLocation(), SourceLocation()); +  return CompoundStmt::Create(C, Stmts, SourceLocation(), SourceLocation());  }  DeclRefExpr *ASTMaker::makeDeclRefExpr( diff --git a/lib/Basic/Targets/AArch64.cpp b/lib/Basic/Targets/AArch64.cpp index 6080cefac744..4d3cd121f705 100644 --- a/lib/Basic/Targets/AArch64.cpp +++ b/lib/Basic/Targets/AArch64.cpp @@ -159,7 +159,7 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,      Builder.defineMacro("__ARM_FP_FAST", "1");    Builder.defineMacro("__ARM_SIZEOF_WCHAR_T", -                      llvm::utostr(Opts.WCharSize ? Opts.WCharSize : 4)); +                      Twine(Opts.WCharSize ? Opts.WCharSize : 4));    Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4"); diff --git a/lib/Basic/Targets/ARM.cpp b/lib/Basic/Targets/ARM.cpp index fe261b774855..6fb0ab41ff5b 100644 --- a/lib/Basic/Targets/ARM.cpp +++ b/lib/Basic/Targets/ARM.cpp @@ -582,7 +582,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,    // ACLE 6.4.4 LDREX/STREX    if (LDREX) -    Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + llvm::utohexstr(LDREX)); +    Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + Twine::utohexstr(LDREX));    // ACLE 6.4.5 CLZ    if (ArchVersion == 5 || (ArchVersion == 6 && CPUProfile != "M") || @@ -591,7 +591,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,    // ACLE 6.5.1 Hardware Floating Point    if (HW_FP) -    Builder.defineMacro("__ARM_FP", "0x" + llvm::utohexstr(HW_FP)); +    Builder.defineMacro("__ARM_FP", "0x" + Twine::utohexstr(HW_FP));    // ACLE predefines.    Builder.defineMacro("__ARM_ACLE", "200"); @@ -672,11 +672,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,      // current AArch32 NEON implementations do not support double-precision      // floating-point even when it is present in VFP.      Builder.defineMacro("__ARM_NEON_FP", -                        "0x" + llvm::utohexstr(HW_FP & ~HW_FP_DP)); +                        "0x" + Twine::utohexstr(HW_FP & ~HW_FP_DP));    }    Builder.defineMacro("__ARM_SIZEOF_WCHAR_T", -                      llvm::utostr(Opts.WCharSize ? Opts.WCharSize : 4)); +                      Twine(Opts.WCharSize ? Opts.WCharSize : 4));    Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4"); diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp index bdf5cdb9407b..3efba26a8373 100644 --- a/lib/Basic/Targets/X86.cpp +++ b/lib/Basic/Targets/X86.cpp @@ -132,7 +132,14 @@ bool X86TargetInfo::initFeatureMap(      break;    case CK_Icelake: -    // TODO: Add icelake features here. +    setFeatureEnabledImpl(Features, "vaes", true); +    setFeatureEnabledImpl(Features, "gfni", true); +    setFeatureEnabledImpl(Features, "vpclmulqdq", true); +    setFeatureEnabledImpl(Features, "avx512bitalg", true); +    setFeatureEnabledImpl(Features, "avx512vnni", true); +    setFeatureEnabledImpl(Features, "avx512vbmi2", true); +    setFeatureEnabledImpl(Features, "avx512vpopcntdq", true); +    setFeatureEnabledImpl(Features, "clwb", true);      LLVM_FALLTHROUGH;    case CK_Cannonlake:      setFeatureEnabledImpl(Features, "avx512ifma", true); @@ -145,8 +152,10 @@ bool X86TargetInfo::initFeatureMap(      setFeatureEnabledImpl(Features, "avx512dq", true);      setFeatureEnabledImpl(Features, "avx512bw", true);      setFeatureEnabledImpl(Features, "avx512vl", true); -    setFeatureEnabledImpl(Features, "pku", true); -    setFeatureEnabledImpl(Features, "clwb", true); +    if (Kind == CK_SkylakeServer) { +      setFeatureEnabledImpl(Features, "pku", true); +      setFeatureEnabledImpl(Features, "clwb", true); +    }      LLVM_FALLTHROUGH;    case CK_SkylakeClient:      setFeatureEnabledImpl(Features, "xsavec", true); @@ -443,7 +452,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,      LLVM_FALLTHROUGH;    case SSE2:      Features["sse2"] = Features["pclmul"] = Features["aes"] = Features["sha"] = -        false; +        Features["gfni"] = false;      LLVM_FALLTHROUGH;    case SSE3:      Features["sse3"] = false; @@ -460,7 +469,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,      LLVM_FALLTHROUGH;    case AVX:      Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] = -        Features["xsaveopt"] = false; +        Features["xsaveopt"] = Features["vaes"] = Features["vpclmulqdq"] = false;      setXOPLevel(Features, FMA4, false);      LLVM_FALLTHROUGH;    case AVX2: @@ -470,7 +479,9 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,      Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =          Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =              Features["avx512vl"] = Features["avx512vbmi"] = -                Features["avx512ifma"] = Features["avx512vpopcntdq"] = false; +                Features["avx512ifma"] = Features["avx512vpopcntdq"] = +                    Features["avx512bitalg"] = Features["avx512vnni"] = +                        Features["avx512vbmi2"] = false;      break;    }  } @@ -572,9 +583,26 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,    } else if (Name == "aes") {      if (Enabled)        setSSELevel(Features, SSE2, Enabled); +    else +      Features["vaes"] = false; +  } else if (Name == "vaes") { +    if (Enabled) { +      setSSELevel(Features, AVX, Enabled); +      Features["aes"] = true; +    }    } else if (Name == "pclmul") {      if (Enabled)        setSSELevel(Features, SSE2, Enabled); +    else +      Features["vpclmulqdq"] = false; +  } else if (Name == "vpclmulqdq") { +    if (Enabled) { +      setSSELevel(Features, AVX, Enabled); +      Features["pclmul"] = true; +    } +  } else if (Name == "gfni") { +     if (Enabled) +      setSSELevel(Features, SSE2, Enabled);    } else if (Name == "avx") {      setSSELevel(Features, AVX, Enabled);    } else if (Name == "avx2") { @@ -584,15 +612,17 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,    } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||               Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||               Name == "avx512vbmi" || Name == "avx512ifma" || -             Name == "avx512vpopcntdq") { +             Name == "avx512vpopcntdq" || Name == "avx512bitalg" || +             Name == "avx512vnni" || Name == "avx512vbmi2") {      if (Enabled)        setSSELevel(Features, AVX512F, Enabled); -    // Enable BWI instruction if VBMI is being enabled. -    if (Name == "avx512vbmi" && Enabled) +    // Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled. +    if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled)        Features["avx512bw"] = true; -    // Also disable VBMI if BWI is being disabled. +    // Also disable VBMI/VBMI2/BITALG if BWI is being disabled.      if (Name == "avx512bw" && !Enabled) -      Features["avx512vbmi"] = false; +      Features["avx512vbmi"] = Features["avx512vbmi2"] = +      Features["avx512bitalg"] = false;    } else if (Name == "fma") {      if (Enabled)        setSSELevel(Features, AVX, Enabled); @@ -636,8 +666,12 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,      if (Feature == "+aes") {        HasAES = true; +    } else if (Feature == "+vaes") { +      HasVAES = true;      } else if (Feature == "+pclmul") {        HasPCLMUL = true; +    } else if (Feature == "+vpclmulqdq") { +      HasVPCLMULQDQ = true;      } else if (Feature == "+lzcnt") {        HasLZCNT = true;      } else if (Feature == "+rdrnd") { @@ -666,22 +700,30 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,        HasFMA = true;      } else if (Feature == "+f16c") {        HasF16C = true; +    } else if (Feature == "+gfni") { +      HasGFNI = true;      } else if (Feature == "+avx512cd") {        HasAVX512CD = true;      } else if (Feature == "+avx512vpopcntdq") {        HasAVX512VPOPCNTDQ = true; +    } else if (Feature == "+avx512vnni") { +      HasAVX512VNNI = true;      } else if (Feature == "+avx512er") {        HasAVX512ER = true;      } else if (Feature == "+avx512pf") {        HasAVX512PF = true;      } else if (Feature == "+avx512dq") {        HasAVX512DQ = true; +    } else if (Feature == "+avx512bitalg") { +      HasAVX512BITALG = true;      } else if (Feature == "+avx512bw") {        HasAVX512BW = true;      } else if (Feature == "+avx512vl") {        HasAVX512VL = true;      } else if (Feature == "+avx512vbmi") {        HasAVX512VBMI = true; +    } else if (Feature == "+avx512vbmi2") { +      HasAVX512VBMI2 = true;      } else if (Feature == "+avx512ifma") {        HasAVX512IFMA = true;      } else if (Feature == "+sha") { @@ -934,9 +976,15 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,    if (HasAES)      Builder.defineMacro("__AES__"); +  if (HasVAES) +    Builder.defineMacro("__VAES__"); +    if (HasPCLMUL)      Builder.defineMacro("__PCLMUL__"); +  if (HasVPCLMULQDQ) +    Builder.defineMacro("__VPCLMULQDQ__"); +    if (HasLZCNT)      Builder.defineMacro("__LZCNT__"); @@ -996,22 +1044,31 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,    if (HasF16C)      Builder.defineMacro("__F16C__"); +  if (HasGFNI) +    Builder.defineMacro("__GFNI__"); +    if (HasAVX512CD)      Builder.defineMacro("__AVX512CD__");    if (HasAVX512VPOPCNTDQ)      Builder.defineMacro("__AVX512VPOPCNTDQ__"); +  if (HasAVX512VNNI) +    Builder.defineMacro("__AVX512VNNI__");    if (HasAVX512ER)      Builder.defineMacro("__AVX512ER__");    if (HasAVX512PF)      Builder.defineMacro("__AVX512PF__");    if (HasAVX512DQ)      Builder.defineMacro("__AVX512DQ__"); +  if (HasAVX512BITALG) +    Builder.defineMacro("__AVX512BITALG__");    if (HasAVX512BW)      Builder.defineMacro("__AVX512BW__");    if (HasAVX512VL)      Builder.defineMacro("__AVX512VL__");    if (HasAVX512VBMI)      Builder.defineMacro("__AVX512VBMI__"); +  if (HasAVX512VBMI2) +    Builder.defineMacro("__AVX512VBMI2__");    if (HasAVX512IFMA)      Builder.defineMacro("__AVX512IFMA__"); @@ -1141,12 +1198,15 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {        .Case("avx512f", true)        .Case("avx512cd", true)        .Case("avx512vpopcntdq", true) +      .Case("avx512vnni", true)        .Case("avx512er", true)        .Case("avx512pf", true)        .Case("avx512dq", true) +      .Case("avx512bitalg", true)        .Case("avx512bw", true)        .Case("avx512vl", true)        .Case("avx512vbmi", true) +      .Case("avx512vbmi2", true)        .Case("avx512ifma", true)        .Case("bmi", true)        .Case("bmi2", true) @@ -1159,6 +1219,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {        .Case("fma4", true)        .Case("fsgsbase", true)        .Case("fxsr", true) +      .Case("gfni", true)        .Case("lwp", true)        .Case("lzcnt", true)        .Case("mmx", true) @@ -1185,6 +1246,8 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {        .Case("sse4.2", true)        .Case("sse4a", true)        .Case("tbm", true) +      .Case("vaes", true) +      .Case("vpclmulqdq", true)        .Case("x87", true)        .Case("xop", true)        .Case("xsave", true) @@ -1203,12 +1266,15 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {        .Case("avx512f", SSELevel >= AVX512F)        .Case("avx512cd", HasAVX512CD)        .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) +      .Case("avx512vnni", HasAVX512VNNI)        .Case("avx512er", HasAVX512ER)        .Case("avx512pf", HasAVX512PF)        .Case("avx512dq", HasAVX512DQ) +      .Case("avx512bitalg", HasAVX512BITALG)        .Case("avx512bw", HasAVX512BW)        .Case("avx512vl", HasAVX512VL)        .Case("avx512vbmi", HasAVX512VBMI) +      .Case("avx512vbmi2", HasAVX512VBMI2)        .Case("avx512ifma", HasAVX512IFMA)        .Case("bmi", HasBMI)        .Case("bmi2", HasBMI2) @@ -1221,6 +1287,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {        .Case("fma4", XOPLevel >= FMA4)        .Case("fsgsbase", HasFSGSBASE)        .Case("fxsr", HasFXSR) +      .Case("gfni", HasGFNI)        .Case("ibt", HasIBT)        .Case("lwp", HasLWP)        .Case("lzcnt", HasLZCNT) @@ -1249,6 +1316,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {        .Case("sse4.2", SSELevel >= SSE42)        .Case("sse4a", XOPLevel >= SSE4A)        .Case("tbm", HasTBM) +      .Case("vaes", HasVAES) +      .Case("vpclmulqdq", HasVPCLMULQDQ)        .Case("x86", true)        .Case("x86_32", getTriple().getArch() == llvm::Triple::x86)        .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h index b1811593545e..cbd6a2d24fb5 100644 --- a/lib/Basic/Targets/X86.h +++ b/lib/Basic/Targets/X86.h @@ -48,7 +48,10 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {    enum XOPEnum { NoXOP, SSE4A, FMA4, XOP } XOPLevel = NoXOP;    bool HasAES = false; +  bool HasVAES = false;    bool HasPCLMUL = false; +  bool HasVPCLMULQDQ = false; +  bool HasGFNI = false;    bool HasLZCNT = false;    bool HasRDRND = false;    bool HasFSGSBASE = false; @@ -65,12 +68,15 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {    bool HasF16C = false;    bool HasAVX512CD = false;    bool HasAVX512VPOPCNTDQ = false; +  bool HasAVX512VNNI = false;    bool HasAVX512ER = false;    bool HasAVX512PF = false;    bool HasAVX512DQ = false; +  bool HasAVX512BITALG = false;    bool HasAVX512BW = false;    bool HasAVX512VL = false;    bool HasAVX512VBMI = false; +  bool HasAVX512VBMI2 = false;    bool HasAVX512IFMA = false;    bool HasSHA = false;    bool HasMPX = false; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 609987c4fa4c..ba54f8342f1b 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -8143,12 +8143,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,    case X86::BI__builtin_ia32_storesd128_mask: {      return EmitX86MaskedStore(*this, Ops, 16);    } +  case X86::BI__builtin_ia32_vpopcntb_128:    case X86::BI__builtin_ia32_vpopcntd_128:    case X86::BI__builtin_ia32_vpopcntq_128: +  case X86::BI__builtin_ia32_vpopcntw_128: +  case X86::BI__builtin_ia32_vpopcntb_256:    case X86::BI__builtin_ia32_vpopcntd_256:    case X86::BI__builtin_ia32_vpopcntq_256: +  case X86::BI__builtin_ia32_vpopcntw_256: +  case X86::BI__builtin_ia32_vpopcntb_512:    case X86::BI__builtin_ia32_vpopcntd_512: -  case X86::BI__builtin_ia32_vpopcntq_512: { +  case X86::BI__builtin_ia32_vpopcntq_512: +  case X86::BI__builtin_ia32_vpopcntw_512: {      llvm::Type *ResultType = ConvertType(E->getType());      llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);      return Builder.CreateCall(F, Ops); diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 90eeddf5cc0b..c7dc8337e19e 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -570,7 +570,7 @@ static llvm::Value *emitHash16Bytes(CGBuilderTy &Builder, llvm::Value *Low,  bool CodeGenFunction::isNullPointerAllowed(TypeCheckKind TCK) {    return TCK == TCK_DowncastPointer || TCK == TCK_Upcast || -         TCK == TCK_UpcastToVirtualBase; +         TCK == TCK_UpcastToVirtualBase || TCK == TCK_DynamicOperation;  }  bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) { @@ -578,7 +578,7 @@ bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) {    return (RD && RD->hasDefinition() && RD->isDynamicClass()) &&           (TCK == TCK_MemberAccess || TCK == TCK_MemberCall ||            TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference || -          TCK == TCK_UpcastToVirtualBase); +          TCK == TCK_UpcastToVirtualBase || TCK == TCK_DynamicOperation);  }  bool CodeGenFunction::sanitizePerformTypeCheck() const { diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 0749b0ac46a7..c32f1e5415da 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -2056,6 +2056,15 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E,    // Get the vtable pointer.    Address ThisPtr = CGF.EmitLValue(E).getAddress(); +  QualType SrcRecordTy = E->getType(); + +  // C++ [class.cdtor]p4: +  //   If the operand of typeid refers to the object under construction or +  //   destruction and the static type of the operand is neither the constructor +  //   or destructor’s class nor one of its bases, the behavior is undefined. +  CGF.EmitTypeCheck(CodeGenFunction::TCK_DynamicOperation, E->getExprLoc(), +                    ThisPtr.getPointer(), SrcRecordTy); +    // C++ [expr.typeid]p2:    //   If the glvalue expression is obtained by applying the unary * operator to    //   a pointer and the pointer is a null pointer value, the typeid expression @@ -2064,7 +2073,6 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E,    // However, this paragraph's intent is not clear.  We choose a very generous    // interpretation which implores us to consider comma operators, conditional    // operators, parentheses and other such constructs. -  QualType SrcRecordTy = E->getType();    if (CGF.CGM.getCXXABI().shouldTypeidBeNullChecked(            isGLValueFromPointerDeref(E), SrcRecordTy)) {      llvm::BasicBlock *BadTypeidBlock = @@ -2127,10 +2135,6 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,    CGM.EmitExplicitCastExprType(DCE, this);    QualType DestTy = DCE->getTypeAsWritten(); -  if (DCE->isAlwaysNull()) -    if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy)) -      return T; -    QualType SrcTy = DCE->getSubExpr()->getType();    // C++ [expr.dynamic.cast]p7: @@ -2151,6 +2155,18 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,      DestRecordTy = DestTy->castAs<ReferenceType>()->getPointeeType();    } +  // C++ [class.cdtor]p5: +  //   If the operand of the dynamic_cast refers to the object under +  //   construction or destruction and the static type of the operand is not a +  //   pointer to or object of the constructor or destructor’s own class or one +  //   of its bases, the dynamic_cast results in undefined behavior. +  EmitTypeCheck(TCK_DynamicOperation, DCE->getExprLoc(), ThisAddr.getPointer(), +                SrcRecordTy); + +  if (DCE->isAlwaysNull()) +    if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy)) +      return T; +    assert(SrcRecordTy->isRecordType() && "source type must be a record type!");    // C++ [expr.dynamic.cast]p4:  diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 5db29eb6004d..fa38ee80bf41 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4175,14 +4175,23 @@ static void emitPrivatesInit(CodeGenFunction &CGF,    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());    LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);    LValue SrcBase; -  if (!Data.FirstprivateVars.empty()) { +  bool IsTargetTask = +      isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || +      isOpenMPTargetExecutionDirective(D.getDirectiveKind()); +  // For target-based directives skip 3 firstprivate arrays BasePointersArray, +  // PointersArray and SizesArray. The original variables for these arrays are +  // not captured and we get their addresses explicitly. +  if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || +      (IsTargetTask && Data.FirstprivateVars.size() > 3)) {      SrcBase = CGF.MakeAddrLValue(          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(              KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),          SharedsTy);    } -  CodeGenFunction::CGCapturedStmtInfo CapturesInfo( -      cast<CapturedStmt>(*D.getAssociatedStmt())); +  OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) +                                 ? OMPD_taskloop +                                 : OMPD_task; +  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind));    FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();    for (auto &&Pair : Privates) {      auto *VD = Pair.second.PrivateCopy; @@ -4192,14 +4201,27 @@ static void emitPrivatesInit(CodeGenFunction &CGF,        LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);        if (auto *Elem = Pair.second.PrivateElemInit) {          auto *OriginalVD = Pair.second.Original; -        auto *SharedField = CapturesInfo.lookup(OriginalVD); -        auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); -        SharedRefLValue = CGF.MakeAddrLValue( -            Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), -            SharedRefLValue.getType(), -            LValueBaseInfo(AlignmentSource::Decl), -            SharedRefLValue.getTBAAInfo()); +        // Check if the variable is the target-based BasePointersArray, +        // PointersArray or SizesArray. +        LValue SharedRefLValue;          QualType Type = OriginalVD->getType(); +        if (IsTargetTask && isa<ImplicitParamDecl>(OriginalVD) && +            isa<CapturedDecl>(OriginalVD->getDeclContext()) && +            cast<CapturedDecl>(OriginalVD->getDeclContext())->getNumParams() == +                0 && +            isa<TranslationUnitDecl>( +                cast<CapturedDecl>(OriginalVD->getDeclContext()) +                    ->getDeclContext())) { +          SharedRefLValue = +              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); +        } else { +          auto *SharedField = CapturesInfo.lookup(OriginalVD); +          SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); +          SharedRefLValue = CGF.MakeAddrLValue( +              Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), +              SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), +              SharedRefLValue.getTBAAInfo()); +        }          if (Type->isArrayType()) {            // Initialize firstprivate array.            if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { @@ -4400,8 +4422,10 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,      }      KmpTaskTQTy = SavedKmpTaskloopTQTy;    } else { -    assert(D.getDirectiveKind() == OMPD_task && -           "Expected taskloop or task directive"); +    assert((D.getDirectiveKind() == OMPD_task || +            isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || +            isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && +           "Expected taskloop, task or target directive");      if (SavedKmpTaskTQTy.isNull()) {        SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(            CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); @@ -7417,8 +7441,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(    // Generate the code for the opening of the data environment. Capture all the    // arguments of the runtime call by reference because they are used in the    // closing of the region. -  auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF, -                                                      PrePostActionTy &) { +  auto &&BeginThenGen = [this, &D, Device, &Info, +                         &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {      // Fill up the arrays with all the mapped variables.      MappableExprsHandler::MapBaseValuesArrayTy BasePointers;      MappableExprsHandler::MapValuesArrayTy Pointers; @@ -7454,8 +7478,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(      llvm::Value *OffloadingArgs[] = {          DeviceID,         PointerNum,    BasePointersArrayArg,          PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; -    auto &RT = CGF.CGM.getOpenMPRuntime(); -    CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), +    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),                          OffloadingArgs);      // If device pointer privatization is required, emit the body of the region @@ -7465,7 +7488,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(    };    // Generate code for the closing of the data region. -  auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { +  auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, +                                            PrePostActionTy &) {      assert(Info.isValid() && "Invalid data environment closing arguments.");      llvm::Value *BasePointersArrayArg = nullptr; @@ -7490,8 +7514,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(      llvm::Value *OffloadingArgs[] = {          DeviceID,         PointerNum,    BasePointersArrayArg,          PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; -    auto &RT = CGF.CGM.getOpenMPRuntime(); -    CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), +    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),                          OffloadingArgs);    }; @@ -7543,25 +7566,11 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(            isa<OMPTargetUpdateDirective>(D)) &&           "Expecting either target enter, exit data, or update directives."); +  CodeGenFunction::OMPTargetDataInfo InputInfo; +  llvm::Value *MapTypesArray = nullptr;    // Generate the code for the opening of the data environment. -  auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { -    // Fill up the arrays with all the mapped variables. -    MappableExprsHandler::MapBaseValuesArrayTy BasePointers; -    MappableExprsHandler::MapValuesArrayTy Pointers; -    MappableExprsHandler::MapValuesArrayTy Sizes; -    MappableExprsHandler::MapFlagsArrayTy MapTypes; - -    // Get map clause information. -    MappableExprsHandler MEHandler(D, CGF); -    MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); - -    // Fill up the arrays and create the arguments. -    TargetDataInfo Info; -    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); -    emitOffloadingArraysArgument(CGF, Info.BasePointersArray, -                                 Info.PointersArray, Info.SizesArray, -                                 Info.MapTypesArray, Info); - +  auto &&ThenGen = [this, &D, Device, &InputInfo, +                    &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {      // Emit device ID if any.      llvm::Value *DeviceID = nullptr;      if (Device) { @@ -7572,13 +7581,16 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(      }      // Emit the number of elements in the offloading arrays. -    auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); +    llvm::Constant *PointerNum = +        CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); -    llvm::Value *OffloadingArgs[] = { -        DeviceID,           PointerNum,      Info.BasePointersArray, -        Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; +    llvm::Value *OffloadingArgs[] = {DeviceID, +                                     PointerNum, +                                     InputInfo.BasePointersArray.getPointer(), +                                     InputInfo.PointersArray.getPointer(), +                                     InputInfo.SizesArray.getPointer(), +                                     MapTypesArray}; -    auto &RT = CGF.CGM.getOpenMPRuntime();      // Select the right runtime function call for each expected standalone      // directive.      const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); @@ -7600,18 +7612,47 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(                          : OMPRTL__tgt_target_data_update;        break;      } -    CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); +    CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);    }; -  // In the event we get an if clause, we don't have to take any action on the -  // else side. -  auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; +  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( +                             CodeGenFunction &CGF, PrePostActionTy &) { +    // Fill up the arrays with all the mapped variables. +    MappableExprsHandler::MapBaseValuesArrayTy BasePointers; +    MappableExprsHandler::MapValuesArrayTy Pointers; +    MappableExprsHandler::MapValuesArrayTy Sizes; +    MappableExprsHandler::MapFlagsArrayTy MapTypes; -  if (IfCond) { -    emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); -  } else { -    RegionCodeGenTy ThenGenRCG(ThenGen); -    ThenGenRCG(CGF); +    // Get map clause information. +    MappableExprsHandler MEHandler(D, CGF); +    MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + +    TargetDataInfo Info; +    // Fill up the arrays and create the arguments. +    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); +    emitOffloadingArraysArgument(CGF, Info.BasePointersArray, +                                 Info.PointersArray, Info.SizesArray, +                                 Info.MapTypesArray, Info); +    InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; +    InputInfo.BasePointersArray = +        Address(Info.BasePointersArray, CGM.getPointerAlign()); +    InputInfo.PointersArray = +        Address(Info.PointersArray, CGM.getPointerAlign()); +    InputInfo.SizesArray = +        Address(Info.SizesArray, CGM.getPointerAlign()); +    MapTypesArray = Info.MapTypesArray; +    if (D.hasClausesOfKind<OMPDependClause>()) +      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); +    else +      emitInlinedDirective(CGF, OMPD_target_update, ThenGen); +  }; + +  if (IfCond) +    emitOMPIfClause(CGF, IfCond, TargetThenGen, +                    [](CodeGenFunction &CGF, PrePostActionTy &) {}); +  else { +    RegionCodeGenTy ThenRCG(TargetThenGen); +    ThenRCG(CGF);    }  } diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index b5fc8d308067..7b2993cfd38d 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -33,10 +33,11 @@ enum OpenMPRTLFunctionNVPTX {    /// \brief Call to void __kmpc_spmd_kernel_deinit();    OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,    /// \brief Call to void __kmpc_kernel_prepare_parallel(void -  /// *outlined_function, void ***args, kmp_int32 nArgs); +  /// *outlined_function, void ***args, kmp_int32 nArgs, int16_t +  /// IsOMPRuntimeInitialized);    OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,    /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function, void -  /// ***args); +  /// ***args, int16_t IsOMPRuntimeInitialized);    OMPRTL_NVPTX__kmpc_kernel_parallel,    /// \brief Call to void __kmpc_kernel_end_parallel();    OMPRTL_NVPTX__kmpc_kernel_end_parallel, @@ -521,7 +522,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,    // Set up shared arguments    Address SharedArgs =        CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrPtrTy, "shared_args"); -  llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer()}; +  // TODO: Optimize runtime initialization and pass in correct value. +  llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer(), +                         /*RequiresOMPRuntime=*/Bld.getInt16(1)};    llvm::Value *Ret = CGF.EmitRuntimeCall(        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);    Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); @@ -637,18 +640,21 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {    }    case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {      /// Build void __kmpc_kernel_prepare_parallel( -    /// void *outlined_function, void ***args, kmp_int32 nArgs); +    /// void *outlined_function, void ***args, kmp_int32 nArgs, int16_t +    /// IsOMPRuntimeInitialized);      llvm::Type *TypeParams[] = {CGM.Int8PtrTy, -        CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty}; +                                CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty, +                                CGM.Int16Ty};      llvm::FunctionType *FnTy =          llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);      RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");      break;    }    case OMPRTL_NVPTX__kmpc_kernel_parallel: { -    /// Build bool __kmpc_kernel_parallel(void **outlined_function, void ***args); +    /// Build bool __kmpc_kernel_parallel(void **outlined_function, void +    /// ***args, int16_t IsOMPRuntimeInitialized);      llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, -        CGM.Int8PtrPtrTy->getPointerTo(0)}; +                                CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int16Ty};      llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);      llvm::FunctionType *FnTy =          llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); @@ -949,8 +955,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(            CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,                "shared_args");        llvm::Value *SharedArgsPtr = SharedArgs.getPointer(); +      // TODO: Optimize runtime initialization and pass in correct value.        llvm::Value *Args[] = {ID, SharedArgsPtr, -                             Bld.getInt32(CapturedVars.size())}; +                             Bld.getInt32(CapturedVars.size()), +                             /*RequiresOMPRuntime=*/Bld.getInt16(1)};        CGF.EmitRuntimeCall(            createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), @@ -970,9 +978,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(          Idx++;        }      } else { -      llvm::Value *Args[] = {ID, -          llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)), -          /*nArgs=*/Bld.getInt32(0)}; +      // TODO: Optimize runtime initialization and pass in correct value. +      llvm::Value *Args[] = { +          ID, llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)), +          /*nArgs=*/Bld.getInt32(0), /*RequiresOMPRuntime=*/Bld.getInt16(1)};        CGF.EmitRuntimeCall(            createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),            Args); diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index f04d28ed0d4a..f9861735832b 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -2907,6 +2907,151 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,    TaskGen(*this, OutlinedFn, Data);  } +static ImplicitParamDecl * +createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, +                                  QualType Ty, CapturedDecl *CD) { +  auto *OrigVD = ImplicitParamDecl::Create( +      C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); +  auto *OrigRef = +      DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, +                          /*RefersToEnclosingVariableOrCapture=*/false, +                          SourceLocation(), Ty, VK_LValue); +  auto *PrivateVD = ImplicitParamDecl::Create( +      C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); +  auto *PrivateRef = DeclRefExpr::Create( +      C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, +      /*RefersToEnclosingVariableOrCapture=*/false, SourceLocation(), Ty, +      VK_LValue); +  QualType ElemType = C.getBaseElementType(Ty); +  auto *InitVD = +      ImplicitParamDecl::Create(C, CD, SourceLocation(), /*Id=*/nullptr, +                                ElemType, ImplicitParamDecl::Other); +  auto *InitRef = +      DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, +                          /*RefersToEnclosingVariableOrCapture=*/false, +                          SourceLocation(), ElemType, VK_LValue); +  PrivateVD->setInitStyle(VarDecl::CInit); +  PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, +                                              InitRef, /*BasePath=*/nullptr, +                                              VK_RValue)); +  Data.FirstprivateVars.emplace_back(OrigRef); +  Data.FirstprivateCopies.emplace_back(PrivateRef); +  Data.FirstprivateInits.emplace_back(InitRef); +  return OrigVD; +} + +void CodeGenFunction::EmitOMPTargetTaskBasedDirective( +    const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, +    OMPTargetDataInfo &InputInfo) { +  // Emit outlined function for task construct. +  auto CS = S.getCapturedStmt(OMPD_task); +  auto CapturedStruct = GenerateCapturedStmtArgument(*CS); +  auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); +  auto *I = CS->getCapturedDecl()->param_begin(); +  auto *PartId = std::next(I); +  auto *TaskT = std::next(I, 4); +  OMPTaskDataTy Data; +  // The task is not final. +  Data.Final.setInt(/*IntVal=*/false); +  // Get list of firstprivate variables. +  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { +    auto IRef = C->varlist_begin(); +    auto IElemInitRef = C->inits().begin(); +    for (auto *IInit : C->private_copies()) { +      Data.FirstprivateVars.push_back(*IRef); +      Data.FirstprivateCopies.push_back(IInit); +      Data.FirstprivateInits.push_back(*IElemInitRef); +      ++IRef; +      ++IElemInitRef; +    } +  } +  OMPPrivateScope TargetScope(*this); +  VarDecl *BPVD = nullptr; +  VarDecl *PVD = nullptr; +  VarDecl *SVD = nullptr; +  if (InputInfo.NumberOfTargetItems > 0) { +    auto *CD = CapturedDecl::Create( +        getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); +    llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); +    QualType BaseAndPointersType = getContext().getConstantArrayType( +        getContext().VoidPtrTy, ArrSize, ArrayType::Normal, +        /*IndexTypeQuals=*/0); +    BPVD = createImplicitFirstprivateForType(getContext(), Data, +                                             BaseAndPointersType, CD); +    PVD = createImplicitFirstprivateForType(getContext(), Data, +                                            BaseAndPointersType, CD); +    QualType SizesType = getContext().getConstantArrayType( +        getContext().getSizeType(), ArrSize, ArrayType::Normal, +        /*IndexTypeQuals=*/0); +    SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD); +    TargetScope.addPrivate( +        BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); +    TargetScope.addPrivate(PVD, +                           [&InputInfo]() { return InputInfo.PointersArray; }); +    TargetScope.addPrivate(SVD, +                           [&InputInfo]() { return InputInfo.SizesArray; }); +  } +  (void)TargetScope.Privatize(); +  // Build list of dependences. +  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) +    for (auto *IRef : C->varlists()) +      Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); +  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, +                    &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { +    // Set proper addresses for generated private copies. +    OMPPrivateScope Scope(CGF); +    if (!Data.FirstprivateVars.empty()) { +      enum { PrivatesParam = 2, CopyFnParam = 3 }; +      auto *CopyFn = CGF.Builder.CreateLoad( +          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); +      auto *PrivatesPtr = CGF.Builder.CreateLoad( +          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); +      // Map privates. +      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; +      llvm::SmallVector<llvm::Value *, 16> CallArgs; +      CallArgs.push_back(PrivatesPtr); +      for (auto *E : Data.FirstprivateVars) { +        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); +        Address PrivatePtr = +            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), +                              ".firstpriv.ptr.addr"); +        PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); +        CallArgs.push_back(PrivatePtr.getPointer()); +      } +      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), +                                                          CopyFn, CallArgs); +      for (auto &&Pair : PrivatePtrs) { +        Address Replacement(CGF.Builder.CreateLoad(Pair.second), +                            CGF.getContext().getDeclAlign(Pair.first)); +        Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); +      } +    } +    // Privatize all private variables except for in_reduction items. +    (void)Scope.Privatize(); +    InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( +        CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); +    InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( +        CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); +    InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( +        CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + +    Action.Enter(CGF); +    OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true, +                             /*EmitPreInitStmt=*/false); +    BodyGen(CGF); +  }; +  auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( +      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, +      Data.NumberOfParts); +  llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); +  IntegerLiteral IfCond(getContext(), TrueOrFalse, +                        getContext().getIntTypeForBitwidth(32, /*Signed=*/0), +                        SourceLocation()); + +  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn, +                                      SharedsTy, CapturedStruct, &IfCond, Data); +} +  void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {    // Emit outlined function for task construct.    auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); @@ -4252,14 +4397,8 @@ void CodeGenFunction::EmitOMPTargetEnterDataDirective(    if (auto *C = S.getSingleClause<OMPDeviceClause>())      Device = C->getDevice(); -  auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, -                                        PrePostActionTy &) { -    CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, -                                                            Device); -  };    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); -  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data, -                                              CodeGen); +  CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);  }  void CodeGenFunction::EmitOMPTargetExitDataDirective( @@ -4279,14 +4418,8 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective(    if (auto *C = S.getSingleClause<OMPDeviceClause>())      Device = C->getDevice(); -  auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, -                                        PrePostActionTy &) { -    CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, -                                                            Device); -  };    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); -  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data, -                                              CodeGen); +  CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);  }  static void emitTargetParallelRegion(CodeGenFunction &CGF, @@ -4585,12 +4718,6 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective(    if (auto *C = S.getSingleClause<OMPDeviceClause>())      Device = C->getDevice(); -  auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, -                                        PrePostActionTy &) { -    CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, -                                                            Device); -  };    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); -  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update, -                                              CodeGen); +  CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);  } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index cd62d00dfb53..dd4c2e43ef64 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2371,7 +2371,10 @@ public:      /// object within its lifetime.      TCK_UpcastToVirtualBase,      /// Checking the value assigned to a _Nonnull pointer. Must not be null. -    TCK_NonnullAssign +    TCK_NonnullAssign, +    /// Checking the operand of a dynamic_cast or a typeid expression.  Must be +    /// null or an object within its lifetime. +    TCK_DynamicOperation    };    /// Determine whether the pointer type check \p TCK permits null pointers. @@ -2820,6 +2823,20 @@ public:    void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,                                   const RegionCodeGenTy &BodyGen,                                   const TaskGenTy &TaskGen, OMPTaskDataTy &Data); +  struct OMPTargetDataInfo { +    Address BasePointersArray = Address::invalid(); +    Address PointersArray = Address::invalid(); +    Address SizesArray = Address::invalid(); +    unsigned NumberOfTargetItems = 0; +    explicit OMPTargetDataInfo() = default; +    OMPTargetDataInfo(Address BasePointersArray, Address PointersArray, +                      Address SizesArray, unsigned NumberOfTargetItems) +        : BasePointersArray(BasePointersArray), PointersArray(PointersArray), +          SizesArray(SizesArray), NumberOfTargetItems(NumberOfTargetItems) {} +  }; +  void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, +                                       const RegionCodeGenTy &BodyGen, +                                       OMPTargetDataInfo &InputInfo);    void EmitOMPParallelDirective(const OMPParallelDirective &S);    void EmitOMPSimdDirective(const OMPSimdDirective &S); diff --git a/lib/Driver/SanitizerArgs.cpp b/lib/Driver/SanitizerArgs.cpp index 6ba8892f3501..3c985a1f71d7 100644 --- a/lib/Driver/SanitizerArgs.cpp +++ b/lib/Driver/SanitizerArgs.cpp @@ -794,7 +794,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,    if (MsanTrackOrigins)      CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-track-origins=" + -                                         llvm::utostr(MsanTrackOrigins))); +                                         Twine(MsanTrackOrigins)));    if (MsanUseAfterDtor)      CmdArgs.push_back("-fsanitize-memory-use-after-dtor"); @@ -829,7 +829,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,    if (AsanFieldPadding)      CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-field-padding=" + -                                         llvm::utostr(AsanFieldPadding))); +                                         Twine(AsanFieldPadding)));    if (AsanUseAfterScope)      CmdArgs.push_back("-fsanitize-address-use-after-scope"); diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp index 7b3f4bc9d872..8b895c4514c4 100644 --- a/lib/Driver/ToolChains/Clang.cpp +++ b/lib/Driver/ToolChains/Clang.cpp @@ -1738,10 +1738,9 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args,    CmdArgs.push_back("-Wreturn-type");    if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { -    std::string N = llvm::utostr(G.getValue()); -    std::string Opt = std::string("-hexagon-small-data-threshold=") + N;      CmdArgs.push_back("-mllvm"); -    CmdArgs.push_back(Args.MakeArgString(Opt)); +    CmdArgs.push_back(Args.MakeArgString("-hexagon-small-data-threshold=" + +                                         Twine(G.getValue())));    }    if (!Args.hasArg(options::OPT_fno_short_enums)) diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp index 60f96d03c9c8..f26880123d8c 100644 --- a/lib/Driver/ToolChains/CommonArgs.cpp +++ b/lib/Driver/ToolChains/CommonArgs.cpp @@ -419,8 +419,8 @@ void tools::AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,      CmdArgs.push_back("-plugin-opt=thinlto");    if (unsigned Parallelism = getLTOParallelism(Args, D)) -    CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=jobs=") + -                                         llvm::to_string(Parallelism))); +    CmdArgs.push_back( +        Args.MakeArgString("-plugin-opt=jobs=" + Twine(Parallelism)));    // If an explicit debugger tuning argument appeared, pass it along.    if (Arg *A = Args.getLastArg(options::OPT_gTune_Group, diff --git a/lib/Driver/ToolChains/Darwin.cpp b/lib/Driver/ToolChains/Darwin.cpp index 289f4ed92f6c..2250e82d9dbf 100644 --- a/lib/Driver/ToolChains/Darwin.cpp +++ b/lib/Driver/ToolChains/Darwin.cpp @@ -545,8 +545,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,    if (unsigned Parallelism =            getLTOParallelism(Args, getToolChain().getDriver())) {      CmdArgs.push_back("-mllvm"); -    CmdArgs.push_back( -        Args.MakeArgString(Twine("-threads=") + llvm::to_string(Parallelism))); +    CmdArgs.push_back(Args.MakeArgString("-threads=" + Twine(Parallelism)));    }    if (getToolChain().ShouldLinkCXXStdlib(Args)) diff --git a/lib/Driver/ToolChains/Hexagon.cpp b/lib/Driver/ToolChains/Hexagon.cpp index f21af5b4dcf5..2debf0e2de54 100644 --- a/lib/Driver/ToolChains/Hexagon.cpp +++ b/lib/Driver/ToolChains/Hexagon.cpp @@ -138,16 +138,15 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,    const Driver &D = HTC.getDriver();    ArgStringList CmdArgs; -  std::string MArchString = "-march=hexagon"; -  CmdArgs.push_back(Args.MakeArgString(MArchString)); +  CmdArgs.push_back("-march=hexagon");    RenderExtraToolArgs(JA, CmdArgs); -  std::string AsName = "hexagon-llvm-mc"; -  std::string MCpuString = "-mcpu=hexagon" + -        toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); +  const char *AsName = "hexagon-llvm-mc";    CmdArgs.push_back("-filetype=obj"); -  CmdArgs.push_back(Args.MakeArgString(MCpuString)); +  CmdArgs.push_back(Args.MakeArgString( +      "-mcpu=hexagon" + +      toolchains::HexagonToolChain::GetTargetCPUVersion(Args)));    if (Output.isFilename()) {      CmdArgs.push_back("-o"); @@ -158,8 +157,7 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,    }    if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { -    std::string N = llvm::utostr(G.getValue()); -    CmdArgs.push_back(Args.MakeArgString(std::string("-gpsize=") + N)); +    CmdArgs.push_back(Args.MakeArgString("-gpsize=" + Twine(G.getValue())));    }    Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); @@ -192,7 +190,7 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,        II.getInputArg().render(Args, CmdArgs);    } -  auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName.c_str())); +  auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName));    C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));  } @@ -243,10 +241,8 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,      CmdArgs.push_back(Opt.c_str());    CmdArgs.push_back("-march=hexagon"); -  std::string CpuVer = -        toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); -  std::string MCpuString = "-mcpu=hexagon" + CpuVer; -  CmdArgs.push_back(Args.MakeArgString(MCpuString)); +  StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args); +  CmdArgs.push_back(Args.MakeArgString("-mcpu=hexagon" + CpuVer));    if (IsShared) {      CmdArgs.push_back("-shared"); @@ -261,8 +257,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,      CmdArgs.push_back("-pie");    if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { -    std::string N = llvm::utostr(G.getValue()); -    CmdArgs.push_back(Args.MakeArgString(std::string("-G") + N)); +    CmdArgs.push_back(Args.MakeArgString("-G" + Twine(G.getValue())));      UseG0 = G.getValue() == 0;    } @@ -291,7 +286,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,    //----------------------------------------------------------------------------    // Start Files    //---------------------------------------------------------------------------- -  const std::string MCpuSuffix = "/" + CpuVer; +  const std::string MCpuSuffix = "/" + CpuVer.str();    const std::string MCpuG0Suffix = MCpuSuffix + "/G0";    const std::string RootDir =        HTC.getHexagonTargetDir(D.InstalledDir, D.PrefixDirs) + "/"; @@ -351,7 +346,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,      CmdArgs.push_back("--start-group");      if (!IsShared) { -      for (const std::string &Lib : OsLibs) +      for (StringRef Lib : OsLibs)          CmdArgs.push_back(Args.MakeArgString("-l" + Lib));        CmdArgs.push_back("-lc");      } diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt index 708cfaf429c9..97ba3edea1c5 100644 --- a/lib/Headers/CMakeLists.txt +++ b/lib/Headers/CMakeLists.txt @@ -7,6 +7,8 @@ set(files    arm64intr.h    avx2intrin.h    avx512bwintrin.h +  avx512bitalgintrin.h +  avx512vlbitalgintrin.h    avx512cdintrin.h    avx512vpopcntdqintrin.h    avx512dqintrin.h @@ -17,11 +19,15 @@ set(files    avx512pfintrin.h    avx512vbmiintrin.h    avx512vbmivlintrin.h +  avx512vbmi2intrin.h +  avx512vlvbmi2intrin.h    avx512vlbwintrin.h    avx512vlcdintrin.h    avx512vldqintrin.h    avx512vlintrin.h    avx512vpopcntdqvlintrin.h +  avx512vnniintrin.h +  avx512vlvnniintrin.h    avxintrin.h    bmi2intrin.h    bmiintrin.h @@ -42,6 +48,7 @@ set(files    fma4intrin.h    fmaintrin.h    fxsrintrin.h +  gfniintrin.h    htmintrin.h    htmxlintrin.h    ia32intrin.h @@ -82,8 +89,10 @@ set(files    tmmintrin.h    unwind.h    vadefs.h +  vaesintrin.h    varargs.h    vecintrin.h +  vpclmulqdqintrin.h    wmmintrin.h    __wmmintrin_aes.h    __wmmintrin_pclmul.h diff --git a/lib/Headers/avx512bitalgintrin.h b/lib/Headers/avx512bitalgintrin.h new file mode 100644 index 000000000000..2dd1471d2f7e --- /dev/null +++ b/lib/Headers/avx512bitalgintrin.h @@ -0,0 +1,97 @@ +/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512BITALGINTRIN_H +#define __AVX512BITALGINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_popcnt_epi16(__m512i __A) +{ +  return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) +{ +  return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U, +              (__v32hi) _mm512_popcnt_epi16(__B), +              (__v32hi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) +{ +  return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(), +              __U, +              __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_popcnt_epi8(__m512i __A) +{ +  return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) +{ +  return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U, +              (__v64qi) _mm512_popcnt_epi8(__B), +              (__v64qi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) +{ +  return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(), +              __U, +              __B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) +{ +  return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A, +              (__v64qi) __B, +              __U); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) +{ +  return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1, +              __A, +              __B); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/avx512vbmi2intrin.h b/lib/Headers/avx512vbmi2intrin.h new file mode 100644 index 000000000000..43e97b40a098 --- /dev/null +++ b/lib/Headers/avx512vbmi2intrin.h @@ -0,0 +1,391 @@ +/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VBMI2INTRIN_H +#define __AVX512VBMI2INTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"))) + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) +{ +  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, +              (__v32hi) __S, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) +{ +  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, +              (__v32hi) _mm512_setzero_hi(), +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) +{ +  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, +              (__v64qi) __S, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) +{ +  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, +              (__v64qi) _mm512_setzero_qi(), +              __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) +{ +  __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, +              __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) +{ +  __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) +{ +  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, +              (__v32hi) __S, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) +{ +  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, +              (__v32hi) _mm512_setzero_hi(), +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) +{ +  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, +              (__v64qi) __S, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) +{ +  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, +              (__v64qi) _mm512_setzero_qi(), +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) +{ +  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, +              (__v32hi) __S, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) +{ +  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, +              (__v32hi) _mm512_setzero_hi(), +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) +{ +  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, +              (__v64qi) __S, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) +{ +  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, +              (__v64qi) _mm512_setzero_qi(), +              __U); +} + +#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ +  (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \ +                                          (__v8di)(B), \ +                                          (int)(I), \ +                                          (__v8di)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm512_maskz_shldi_epi64(U, A, B, I) \ +  _mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shldi_epi64(A, B, I) \ +  _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ +  (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \ +                                          (__v16si)(B), \ +                                          (int)(I), \ +                                          (__v16si)(S), \ +                                          (__mmask16)(U)); }) + +#define _mm512_maskz_shldi_epi32(U, A, B, I) \ +  _mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shldi_epi32(A, B, I) \ +  _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) + +#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ +  (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \ +                                          (__v32hi)(B), \ +                                          (int)(I), \ +                                          (__v32hi)(S), \ +                                          (__mmask32)(U)); }) + +#define _mm512_maskz_shldi_epi16(U, A, B, I) \ +  _mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shldi_epi16(A, B, I) \ +  _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) + +#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ +  (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \ +                                          (__v8di)(B), \ +                                          (int)(I), \ +                                          (__v8di)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ +  _mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shrdi_epi64(A, B, I) \ +  _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ +  (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \ +                                          (__v16si)(B), \ +                                          (int)(I), \ +                                          (__v16si)(S), \ +                                          (__mmask16)(U)); }) + +#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ +  _mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shrdi_epi32(A, B, I) \ +  _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) + +#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ +  (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \ +                                          (__v32hi)(B), \ +                                          (int)(I), \ +                                          (__v32hi)(S), \ +                                          (__mmask32)(U)); }) + +#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ +  _mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shrdi_epi16(A, B, I) \ +  _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, +              (__v8di) __A, +              (__v8di) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S, +              (__v8di) __A, +              (__v8di) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, +              (__v8di) __A, +              (__v8di) __B, +              (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) -1); +} + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, +              (__v32hi) __A, +              (__v32hi) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S, +              (__v32hi) __A, +              (__v32hi) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, +              (__v32hi) __A, +              (__v32hi) __B, +              (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, +              (__v8di) __A, +              (__v8di) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S, +              (__v8di) __A, +              (__v8di) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, +              (__v8di) __A, +              (__v8di) __B, +              (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) -1); +} + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, +              (__v32hi) __A, +              (__v32hi) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S, +              (__v32hi) __A, +              (__v32hi) __B, +              __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, +              (__v32hi) __A, +              (__v32hi) __B, +              (__mmask32) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif + diff --git a/lib/Headers/avx512vlbitalgintrin.h b/lib/Headers/avx512vlbitalgintrin.h new file mode 100644 index 000000000000..76eb87721b8b --- /dev/null +++ b/lib/Headers/avx512vlbitalgintrin.h @@ -0,0 +1,157 @@ +/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VLBITALGINTRIN_H +#define __AVX512VLBITALGINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"))) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_popcnt_epi16(__m256i __A) +{ +  return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) +{ +  return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, +              (__v16hi) _mm256_popcnt_epi16(__B), +              (__v16hi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) +{ +  return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), +              __U, +              __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_popcnt_epi16(__m128i __A) +{ +  return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) +{ +  return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, +              (__v8hi) _mm128_popcnt_epi16(__B), +              (__v8hi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) +{ +  return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), +              __U, +              __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_popcnt_epi8(__m256i __A) +{ +  return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) +{ +  return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, +              (__v32qi) _mm256_popcnt_epi8(__B), +              (__v32qi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) +{ +  return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), +              __U, +              __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_popcnt_epi8(__m128i __A) +{ +  return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) +{ +  return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, +              (__v16qi) _mm128_popcnt_epi8(__B), +              (__v16qi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) +{ +  return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), +              __U, +              __B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) +{ +  return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, +              (__v32qi) __B, +              __U); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) +{ +  return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1, +              __A, +              __B); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) +{ +  return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, +              (__v16qi) __B, +              __U); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) +{ +  return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1, +              __A, +              __B); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/avx512vlvbmi2intrin.h b/lib/Headers/avx512vlvbmi2intrin.h new file mode 100644 index 000000000000..d1ec4976f274 --- /dev/null +++ b/lib/Headers/avx512vlvbmi2intrin.h @@ -0,0 +1,748 @@ +/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VLVBMI2INTRIN_H +#define __AVX512VLVBMI2INTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"))) + +static  __inline __m128i __DEFAULT_FN_ATTRS +_mm128_setzero_hi(void) { +  return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) +{ +  return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, +              (__v8hi) __S, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D) +{ +  return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, +              (__v8hi) _mm128_setzero_hi(), +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) +{ +  return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, +              (__v16qi) __S, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D) +{ +  return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, +              (__v16qi) _mm128_setzero_hi(), +              __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) +{ +  __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, +              __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) +{ +  __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) +{ +  return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, +              (__v8hi) __S, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D) +{ +  return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, +              (__v8hi) _mm128_setzero_hi(), +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) +{ +  return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, +              (__v16qi) __S, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D) +{ +  return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, +              (__v16qi) _mm128_setzero_hi(), +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) +{ +  return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, +              (__v8hi) __S, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) +{ +  return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, +              (__v8hi) _mm128_setzero_hi(), +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) +{ +  return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, +              (__v16qi) __S, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) +{ +  return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, +              (__v16qi) _mm128_setzero_hi(), +              __U); +} + +static  __inline __m256i __DEFAULT_FN_ATTRS +_mm256_setzero_hi(void) { +  return (__m256i)(__v16hi){ 0, 0, 0, 0, 0, 0, 0, 0, +                             0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) +{ +  return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, +              (__v16hi) __S, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) +{ +  return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, +              (__v16hi) _mm256_setzero_hi(), +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) +{ +  return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, +              (__v32qi) __S, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) +{ +  return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, +              (__v32qi) _mm256_setzero_hi(), +              __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) +{ +  __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, +              __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) +{ +  __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) +{ +  return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, +              (__v16hi) __S, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) +{ +  return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, +              (__v16hi) _mm256_setzero_hi(), +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) +{ +  return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, +              (__v32qi) __S, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) +{ +  return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, +              (__v32qi) _mm256_setzero_hi(), +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) +{ +  return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, +              (__v16hi) __S, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) +{ +  return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, +              (__v16hi) _mm256_setzero_hi(), +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) +{ +  return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, +              (__v32qi) __S, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) +{ +  return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, +              (__v32qi) _mm256_setzero_hi(), +              __U); +} + +#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ +  (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \ +                                          (__v4di)(B), \ +                                          (int)(I), \ +                                          (__v4di)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm256_maskz_shldi_epi64(U, A, B, I) \ +  _mm256_mask_shldi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shldi_epi64(A, B, I) \ +  _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ +  (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \ +                                          (__v2di)(B), \ +                                          (int)(I), \ +                                          (__v2di)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm128_maskz_shldi_epi64(U, A, B, I) \ +  _mm128_mask_shldi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shldi_epi64(A, B, I) \ +  _mm128_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ +  (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \ +                                          (__v8si)(B), \ +                                          (int)(I), \ +                                          (__v8si)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm256_maskz_shldi_epi32(U, A, B, I) \ +  _mm256_mask_shldi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shldi_epi32(A, B, I) \ +  _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ +  (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \ +                                          (__v4si)(B), \ +                                          (int)(I), \ +                                          (__v4si)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm128_maskz_shldi_epi32(U, A, B, I) \ +  _mm128_mask_shldi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shldi_epi32(A, B, I) \ +  _mm128_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ +  (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \ +                                          (__v16hi)(B), \ +                                          (int)(I), \ +                                          (__v16hi)(S), \ +                                          (__mmask16)(U)); }) + +#define _mm256_maskz_shldi_epi16(U, A, B, I) \ +  _mm256_mask_shldi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shldi_epi16(A, B, I) \ +  _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ +  (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \ +                                          (__v8hi)(B), \ +                                          (int)(I), \ +                                          (__v8hi)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm128_maskz_shldi_epi16(U, A, B, I) \ +  _mm128_mask_shldi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shldi_epi16(A, B, I) \ +  _mm128_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ +  (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \ +                                          (__v4di)(B), \ +                                          (int)(I), \ +                                          (__v4di)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm256_maskz_shrdi_epi64(U, A, B, I) \ +  _mm256_mask_shrdi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shrdi_epi64(A, B, I) \ +  _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ +  (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \ +                                          (__v2di)(B), \ +                                          (int)(I), \ +                                          (__v2di)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm128_maskz_shrdi_epi64(U, A, B, I) \ +  _mm128_mask_shrdi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shrdi_epi64(A, B, I) \ +  _mm128_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ +  (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \ +                                          (__v8si)(B), \ +                                          (int)(I), \ +                                          (__v8si)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm256_maskz_shrdi_epi32(U, A, B, I) \ +  _mm256_mask_shrdi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shrdi_epi32(A, B, I) \ +  _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ +  (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \ +                                          (__v4si)(B), \ +                                          (int)(I), \ +                                          (__v4si)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm128_maskz_shrdi_epi32(U, A, B, I) \ +  _mm128_mask_shrdi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shrdi_epi32(A, B, I) \ +  _mm128_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ +  (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \ +                                          (__v16hi)(B), \ +                                          (int)(I), \ +                                          (__v16hi)(S), \ +                                          (__mmask16)(U)); }) + +#define _mm256_maskz_shrdi_epi16(U, A, B, I) \ +  _mm256_mask_shrdi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shrdi_epi16(A, B, I) \ +  _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ +  (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \ +                                          (__v8hi)(B), \ +                                          (int)(I), \ +                                          (__v8hi)(S), \ +                                          (__mmask8)(U)); }) + +#define _mm128_maskz_shrdi_epi16(U, A, B, I) \ +  _mm128_mask_shrdi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shrdi_epi16(A, B, I) \ +  _mm128_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, +              (__v4di) __A, +              (__v4di) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S, +              (__v4di) __A, +              (__v4di) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, +              (__v4di) __A, +              (__v4di) __B, +              (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, +              (__v2di) __A, +              (__v2di) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S, +              (__v2di) __A, +              (__v2di) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, +              (__v2di) __A, +              (__v2di) __B, +              (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, +              (__v16hi) __A, +              (__v16hi) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S, +              (__v16hi) __A, +              (__v16hi) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, +              (__v16hi) __A, +              (__v16hi) __B, +              (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, +              (__v8hi) __A, +              (__v8hi) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S, +              (__v8hi) __A, +              (__v8hi) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, +              (__v8hi) __A, +              (__v8hi) __B, +              (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, +              (__v4di) __A, +              (__v4di) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S, +              (__v4di) __A, +              (__v4di) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, +              (__v4di) __A, +              (__v4di) __B, +              (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, +              (__v2di) __A, +              (__v2di) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S, +              (__v2di) __A, +              (__v2di) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, +              (__v2di) __A, +              (__v2di) __B, +              (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, +              (__v16hi) __A, +              (__v16hi) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S, +              (__v16hi) __A, +              (__v16hi) __B, +              __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, +              (__v16hi) __A, +              (__v16hi) __B, +              (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, +              (__v8hi) __A, +              (__v8hi) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S, +              (__v8hi) __A, +              (__v8hi) __B, +              __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, +              (__v8hi) __A, +              (__v8hi) __B, +              (__mmask8) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/avx512vlvnniintrin.h b/lib/Headers/avx512vlvnniintrin.h new file mode 100644 index 000000000000..745ae8b7ad3d --- /dev/null +++ b/lib/Headers/avx512vlvnniintrin.h @@ -0,0 +1,254 @@ +/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VLVNNIINTRIN_H +#define __AVX512VLVNNIINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"))) + + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, +              (__v8si) __A, +              (__v8si) __B, +              (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, +              (__v4si) __A, +              (__v4si) __B, +              (__mmask8) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/avx512vnniintrin.h b/lib/Headers/avx512vnniintrin.h new file mode 100644 index 000000000000..0c6badd231aa --- /dev/null +++ b/lib/Headers/avx512vnniintrin.h @@ -0,0 +1,146 @@ +/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VNNIINTRIN_H +#define __AVX512VNNIINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"))) + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, +              (__v16si) __A, +              (__v16si) __B, +              (__mmask16) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/gfniintrin.h b/lib/Headers/gfniintrin.h new file mode 100644 index 000000000000..20fadccfaaed --- /dev/null +++ b/lib/Headers/gfniintrin.h @@ -0,0 +1,202 @@ +/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __GFNIINTRIN_H +#define __GFNIINTRIN_H + + +#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({                   \ +  (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A),          \ +                                                  (__v16qi)(__m128i)(B),          \ +                                                  (char)(I)); }) + +#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({        \ +  (__m128i)__builtin_ia32_selectb_128((__mmask16)(U),                             \ +        (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I),                          \ +        (__v16qi)(__m128i)(S)); }) + + +#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({          \ +  (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(),       \ +        U, A, B, I); }) + + +#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({                \ +  (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A),          \ +                                                  (__v32qi)(__m256i)(B),          \ +                                                  (char)(I)); }) + +#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({     \ +   (__m256i)__builtin_ia32_selectb_256((__mmask32)(U),                            \ +        (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I),                       \ +        (__v32qi)(__m256i)(S)); }) + +#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({       \ +  (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ +        U, A, B, I); }) + + +#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({                \ +  (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A),          \ +                                                  (__v64qi)(__m512i)(B),          \ +                                                  (char)(I)); }) + +#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({     \ +   (__m512i)__builtin_ia32_selectb_512((__mmask64)(U),                            \ +        (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I),                       \ +        (__v64qi)(__m512i)(S)); }) + +#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({       \ +  (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_qi(),    \ +        U, A, B, I); }) + +#define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({                      \ +  (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A),             \ +                                                  (__v16qi)(__m128i)(B),          \ +                                                  (char)(I)); }) + +#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({           \ +  (__m128i)__builtin_ia32_selectb_128((__mmask16)(U),                             \ +        (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I),                             \ +        (__v16qi)(__m128i)(S)); }) + + +#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({             \ +  (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(),          \ +        U, A, B, I); }) + + +#define _mm256_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({                   \ +  (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A),             \ +                                                  (__v32qi)(__m256i)(B),          \ +                                                  (char)(I)); }) + +#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({        \ +   (__m256i)__builtin_ia32_selectb_256((__mmask32)(U),                            \ +        (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I),                          \ +        (__v32qi)(__m256i)(S)); }) + +#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({          \ +  (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(),    \ +        U, A, B, I); }) + + +#define _mm512_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({                   \ +  (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A),             \ +                                                  (__v64qi)(__m512i)(B),          \ +                                                  (char)(I)); }) + +#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({        \ +   (__m512i)__builtin_ia32_selectb_512((__mmask64)(U),                            \ +        (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I),                          \ +        (__v64qi)(__m512i)(S)); }) + +#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({          \ +  (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_qi(),       \ +        U, A, B, I); }) + +/* Default attributes for simple form (no masking). */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"))) + +/* Default attributes for ZMM forms. */ +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"))) + +/* Default attributes for VLX forms. */ +#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_gf2p8mul_epi8(__m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, +              (__v16qi) __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) +{ +  return (__m128i) __builtin_ia32_selectb_128(__U, +              (__v16qi) _mm_gf2p8mul_epi8(__A, __B), +              (__v16qi) __S); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) +{ +  return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), +              __U, __A, __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A, +              (__v32qi) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_selectb_256(__U, +              (__v32qi) _mm256_gf2p8mul_epi8(__A, __B), +              (__v32qi) __S); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) +{ +  return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), +              __U, __A, __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A, +              (__v64qi) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_selectb_512(__U, +              (__v64qi) _mm512_gf2p8mul_epi8(__A, __B), +              (__v64qi) __S); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) +{ +  return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_qi(), +              __U, __A, __B); +} + +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_F +#undef __DEFAULT_FN_ATTRS_VL + +#endif // __GFNIINTRIN_H + diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h index 64ad6e658422..d3421dc86c99 100644 --- a/lib/Headers/immintrin.h +++ b/lib/Headers/immintrin.h @@ -118,6 +118,10 @@ _mm256_cvtph_ps(__m128i __a)  }  #endif /* __AVX2__ */ +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) +#include <vpclmulqdqintrin.h> +#endif +  #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)  #include <bmiintrin.h>  #endif @@ -146,6 +150,10 @@ _mm256_cvtph_ps(__m128i __a)  #include <avx512bwintrin.h>  #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) +#include <avx512bitalgintrin.h> +#endif +  #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)  #include <avx512cdintrin.h>  #endif @@ -159,11 +167,25 @@ _mm256_cvtph_ps(__m128i __a)  #include <avx512vpopcntdqvlintrin.h>  #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) +#include <avx512vnniintrin.h> +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ +    (defined(__AVX512VL__) && defined(__AVX512VNNI__)) +#include <avx512vlvnniintrin.h> +#endif +  #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)  #include <avx512dqintrin.h>  #endif  #if !defined(_MSC_VER) || __has_feature(modules) || \ +    (defined(__AVX512VL__) && defined(__AVX512BITALG__)) +#include <avx512vlbitalgintrin.h> +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \      (defined(__AVX512VL__) && defined(__AVX512BW__))  #include <avx512vlbwintrin.h>  #endif @@ -200,6 +222,15 @@ _mm256_cvtph_ps(__m128i __a)  #include <avx512vbmivlintrin.h>  #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) +#include <avx512vbmi2intrin.h> +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ +    (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) +#include <avx512vlvbmi2intrin.h> +#endif +  #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)  #include <avx512pfintrin.h>  #endif @@ -208,6 +239,14 @@ _mm256_cvtph_ps(__m128i __a)  #include <pkuintrin.h>  #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) +#include <vaesintrin.h> +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) +#include <gfniintrin.h> +#endif +  #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)  static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))  _rdrand16_step(unsigned short *__p) diff --git a/lib/Headers/vaesintrin.h b/lib/Headers/vaesintrin.h new file mode 100644 index 000000000000..efbb8a565292 --- /dev/null +++ b/lib/Headers/vaesintrin.h @@ -0,0 +1,98 @@ +/*===------------------ vaesintrin.h - VAES intrinsics ---------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <vaesintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __VAESINTRIN_H +#define __VAESINTRIN_H + +/* Default attributes for YMM forms. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"))) + +/* Default attributes for ZMM forms. */ +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"))) + + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesenc_epi128(__m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_aesenc256((__v4di) __A, +              (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesenc_epi128(__m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_aesenc512((__v8di) __A, +              (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesdec_epi128(__m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_aesdec256((__v4di) __A, +              (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesdec_epi128(__m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_aesdec512((__v8di) __A, +              (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesenclast_epi128(__m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A, +              (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesenclast_epi128(__m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A, +              (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesdeclast_epi128(__m256i __A, __m256i __B) +{ +  return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A, +              (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesdeclast_epi128(__m512i __A, __m512i __B) +{ +  return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A, +              (__v8di) __B); +} + + +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_F + +#endif diff --git a/lib/Headers/vpclmulqdqintrin.h b/lib/Headers/vpclmulqdqintrin.h new file mode 100644 index 000000000000..21cda2221007 --- /dev/null +++ b/lib/Headers/vpclmulqdqintrin.h @@ -0,0 +1,42 @@ +/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __VPCLMULQDQINTRIN_H +#define __VPCLMULQDQINTRIN_H + +#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({    \ +  (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A),  \ +                                       (__v4di)(__m256i)(B),  \ +                                       (char)(I)); }) + +#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({    \ +  (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A),  \ +                                       (__v8di)(__m512i)(B),  \ +                                       (char)(I)); }) + +#endif // __VPCLMULQDQINTRIN_H + diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index 9fe4309ca124..2a999399fb50 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -1548,15 +1548,21 @@ void Parser::DiagnoseMisplacedCXX11Attribute(ParsedAttributesWithRange &Attrs,    SourceLocation Loc = Tok.getLocation();    ParseCXX11Attributes(Attrs);    CharSourceRange AttrRange(SourceRange(Loc, Attrs.Range.getEnd()), true); - +  // FIXME: use err_attributes_misplaced    Diag(Loc, diag::err_attributes_not_allowed)      << FixItHint::CreateInsertionFromRange(CorrectLocation, AttrRange)      << FixItHint::CreateRemoval(AttrRange);  } -void Parser::DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs) { -  Diag(attrs.Range.getBegin(), diag::err_attributes_not_allowed) -    << attrs.Range; +void Parser::DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs, +                                          const SourceLocation CorrectLocation) { +  if (CorrectLocation.isValid()) { +    CharSourceRange AttrRange(attrs.Range, true); +    Diag(CorrectLocation, diag::err_attributes_misplaced) +        << FixItHint::CreateInsertionFromRange(CorrectLocation, AttrRange) +        << FixItHint::CreateRemoval(AttrRange); +  } else +    Diag(attrs.Range.getBegin(), diag::err_attributes_not_allowed) << attrs.Range;  }  void Parser::ProhibitCXX11Attributes(ParsedAttributesWithRange &Attrs, diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp index 72d653797c60..8aa50a2c7f2a 100644 --- a/lib/Parse/Parser.cpp +++ b/lib/Parse/Parser.cpp @@ -930,7 +930,31 @@ Parser::ParseDeclOrFunctionDefInternal(ParsedAttributesWithRange &attrs,    // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };"    // declaration-specifiers init-declarator-list[opt] ';'    if (Tok.is(tok::semi)) { -    ProhibitAttributes(attrs); +    auto LengthOfTSTToken = [](DeclSpec::TST TKind) { +      assert(DeclSpec::isDeclRep(TKind)); +      switch(TKind) { +      case DeclSpec::TST_class: +        return 5; +      case DeclSpec::TST_struct: +        return 6; +      case DeclSpec::TST_union: +        return 5; +      case DeclSpec::TST_enum: +        return 4; +      case DeclSpec::TST_interface: +        return 9; +      default: +        llvm_unreachable("we only expect to get the length of the class/struct/union/enum"); +      } +       +    }; +    // Suggest correct location to fix '[[attrib]] struct' to 'struct [[attrib]]' +    SourceLocation CorrectLocationForAttributes = +        DeclSpec::isDeclRep(DS.getTypeSpecType()) +            ? DS.getTypeSpecTypeLoc().getLocWithOffset( +                  LengthOfTSTToken(DS.getTypeSpecType())) +            : SourceLocation(); +    ProhibitAttributes(attrs, CorrectLocationForAttributes);      ConsumeToken();      RecordDecl *AnonRecord = nullptr;      Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none, diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp index f2fb95c39163..aa26b37f444d 100644 --- a/lib/Sema/SemaDeclCXX.cpp +++ b/lib/Sema/SemaDeclCXX.cpp @@ -12265,11 +12265,10 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion(    // Construct the body of the conversion function { return __invoke; }.    Expr *FunctionRef = BuildDeclRefExpr(Invoker, Invoker->getType(),                                          VK_LValue, Conv->getLocation()).get(); -   assert(FunctionRef && "Can't refer to __invoke function?"); -   Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get(); -   Conv->setBody(new (Context) CompoundStmt(Context, Return, -                                            Conv->getLocation(), -                                            Conv->getLocation())); +  assert(FunctionRef && "Can't refer to __invoke function?"); +  Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get(); +  Conv->setBody(CompoundStmt::Create(Context, Return, Conv->getLocation(), +                                     Conv->getLocation()));    Conv->markUsed(Context);    Conv->setReferenced(); @@ -12330,9 +12329,8 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion(    // Set the body of the conversion function.    Stmt *ReturnS = Return.get(); -  Conv->setBody(new (Context) CompoundStmt(Context, ReturnS, -                                           Conv->getLocation(), -                                           Conv->getLocation())); +  Conv->setBody(CompoundStmt::Create(Context, ReturnS, Conv->getLocation(), +                                     Conv->getLocation()));    Conv->markUsed(Context);    // We're done; notify the mutation listener, if any. diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp index 9c842ded1e10..cff9fbbf491b 100644 --- a/lib/Sema/SemaExprCXX.cpp +++ b/lib/Sema/SemaExprCXX.cpp @@ -6265,9 +6265,8 @@ Stmt *Sema::MaybeCreateStmtWithCleanups(Stmt *SubStmt) {    // a StmtExpr; currently this is only used for asm statements.    // This is hacky, either create a new CXXStmtWithTemporaries statement or    // a new AsmStmtWithTemporaries. -  CompoundStmt *CompStmt = new (Context) CompoundStmt(Context, SubStmt, -                                                      SourceLocation(), -                                                      SourceLocation()); +  CompoundStmt *CompStmt = CompoundStmt::Create( +      Context, SubStmt, SourceLocation(), SourceLocation());    Expr *E = new (Context) StmtExpr(CompStmt, Context.VoidTy, SourceLocation(),                                     SourceLocation());    return MaybeCreateExprWithCleanups(E); diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp index d3f91a4e273d..a9db973851df 100644 --- a/lib/Sema/SemaLookup.cpp +++ b/lib/Sema/SemaLookup.cpp @@ -1589,7 +1589,7 @@ bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) {      return false;    // Find the extra places where we need to look. -  llvm::DenseSet<Module*> &LookupModules = SemaRef.getLookupModules(); +  const auto &LookupModules = SemaRef.getLookupModules();    if (LookupModules.empty())      return false; @@ -1604,7 +1604,8 @@ bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) {    // Check whether DeclModule is transitively exported to an import of    // the lookup set.    return std::any_of(LookupModules.begin(), LookupModules.end(), -                     [&](Module *M) { return M->isModuleVisible(DeclModule); }); +                     [&](const Module *M) { +                       return M->isModuleVisible(DeclModule); });  }  bool Sema::isVisibleSlow(const NamedDecl *D) { diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 0880b2d79060..24b58e8fd12b 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -1297,7 +1297,8 @@ bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) {              Level, /*NotLastprivate=*/true) &&          // If the variable is artificial and must be captured by value - try to          // capture by value. -        !(isa<OMPCapturedExprDecl>(D) && D->hasAttr<OMPCaptureKindAttr>()); +        !(isa<OMPCapturedExprDecl>(D) && !D->hasAttr<OMPCaptureNoInitAttr>() && +          !cast<OMPCapturedExprDecl>(D)->getInit()->isGLValue());    }    // When passing data by copy, we need to make sure it fits the uintptr size @@ -2326,7 +2327,6 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id,    ASTContext &C = S.getASTContext();    Expr *Init = AsExpression ? CaptureExpr : CaptureExpr->IgnoreImpCasts();    QualType Ty = Init->getType(); -  Attr *OMPCaptureKind = nullptr;    if (CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue()) {      if (S.getLangOpts().CPlusPlus) {        Ty = C.getLValueReferenceType(Ty); @@ -2339,16 +2339,11 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id,        Init = Res.get();      }      WithInit = true; -  } else if (AsExpression) { -    // This variable must be captured by value. -    OMPCaptureKind = OMPCaptureKindAttr::CreateImplicit(C, OMPC_unknown);    }    auto *CED = OMPCapturedExprDecl::Create(C, S.CurContext, Id, Ty,                                            CaptureExpr->getLocStart());    if (!WithInit)      CED->addAttr(OMPCaptureNoInitAttr::CreateImplicit(C, SourceRange())); -  if (OMPCaptureKind) -    CED->addAttr(OMPCaptureKind);    S.CurContext->addHiddenDecl(CED);    S.AddInitializerToDecl(CED, Init, /*DirectInit=*/false);    return CED; @@ -7628,6 +7623,11 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(      case OMPD_teams_distribute_parallel_for_simd:        CaptureRegion = OMPD_teams;        break; +    case OMPD_target_update: +    case OMPD_target_enter_data: +    case OMPD_target_exit_data: +      CaptureRegion = OMPD_task; +      break;      case OMPD_cancel:      case OMPD_parallel:      case OMPD_parallel_sections: @@ -7644,9 +7644,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(      case OMPD_taskloop:      case OMPD_taskloop_simd:      case OMPD_target_data: -    case OMPD_target_enter_data: -    case OMPD_target_exit_data: -    case OMPD_target_update:        // Do not capture if-clause expressions.        break;      case OMPD_threadprivate: @@ -8007,15 +8004,17 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(      break;    case OMPC_device:      switch (DKind) { +    case OMPD_target_update: +    case OMPD_target_enter_data: +    case OMPD_target_exit_data: +      CaptureRegion = OMPD_task; +      break;      case OMPD_target_teams:      case OMPD_target_teams_distribute:      case OMPD_target_teams_distribute_simd:      case OMPD_target_teams_distribute_parallel_for:      case OMPD_target_teams_distribute_parallel_for_simd:      case OMPD_target_data: -    case OMPD_target_enter_data: -    case OMPD_target_exit_data: -    case OMPD_target_update:      case OMPD_target:      case OMPD_target_simd:      case OMPD_target_parallel: diff --git a/lib/Sema/SemaStmt.cpp b/lib/Sema/SemaStmt.cpp index ff0f4d995851..4474d62949a2 100644 --- a/lib/Sema/SemaStmt.cpp +++ b/lib/Sema/SemaStmt.cpp @@ -388,7 +388,7 @@ StmtResult Sema::ActOnCompoundStmt(SourceLocation L, SourceLocation R,        DiagnoseEmptyLoopBody(Elts[i], Elts[i + 1]);    } -  return new (Context) CompoundStmt(Context, Elts, L, R); +  return CompoundStmt::Create(Context, Elts, L, R);  }  StmtResult diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp index 1deb8638756b..d8af8f34530b 100644 --- a/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -3932,22 +3932,22 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,                                           TemplateArgs))        return; -    if (CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(Function)) { -      // If this is a constructor, instantiate the member initializers. -      InstantiateMemInitializers(Ctor, cast<CXXConstructorDecl>(PatternDecl), -                                 TemplateArgs); - -      // If this is an MS ABI dllexport default constructor, instantiate any -      // default arguments. -      if (Context.getTargetInfo().getCXXABI().isMicrosoft() && -          Ctor->isDefaultConstructor()) { -        InstantiateDefaultCtorDefaultArgs(*this, Ctor); -      } -    } -      if (PatternDecl->hasSkippedBody()) {        ActOnSkippedFunctionBody(Function);      } else { +      if (CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(Function)) { +        // If this is a constructor, instantiate the member initializers. +        InstantiateMemInitializers(Ctor, cast<CXXConstructorDecl>(PatternDecl), +                                   TemplateArgs); + +        // If this is an MS ABI dllexport default constructor, instantiate any +        // default arguments. +        if (Context.getTargetInfo().getCXXABI().isMicrosoft() && +            Ctor->isDefaultConstructor()) { +          InstantiateDefaultCtorDefaultArgs(*this, Ctor); +        } +      } +        // Instantiate the function body.        StmtResult Body = SubstStmt(Pattern, TemplateArgs); diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp index 8ef1491eb2da..6163b811c769 100644 --- a/lib/Serialization/ASTReaderStmt.cpp +++ b/lib/Serialization/ASTReaderStmt.cpp @@ -119,7 +119,7 @@ void ASTStmtReader::VisitCompoundStmt(CompoundStmt *S) {    unsigned NumStmts = Record.readInt();    while (NumStmts--)      Stmts.push_back(Record.readSubStmt()); -  S->setStmts(Record.getContext(), Stmts); +  S->setStmts(Stmts);    S->LBraceLoc = ReadSourceLocation();    S->RBraceLoc = ReadSourceLocation();  } @@ -3081,7 +3081,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {        break;      case STMT_COMPOUND: -      S = new (Context) CompoundStmt(Empty); +      S = CompoundStmt::CreateEmpty( +          Context, /*NumStmts=*/Record[ASTStmtReader::NumStmtFields]);        break;      case STMT_CASE:  | 
