diff options
Diffstat (limited to 'clang/utils/TableGen/NeonEmitter.cpp')
-rw-r--r-- | clang/utils/TableGen/NeonEmitter.cpp | 411 |
1 files changed, 282 insertions, 129 deletions
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index a0f3fb2ddc08..d5bf59ef04ad 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -27,8 +27,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" @@ -98,7 +99,8 @@ enum EltType { Poly128, Float16, Float32, - Float64 + Float64, + BFloat16 }; } // end namespace NeonTypeFlags @@ -146,6 +148,7 @@ private: SInt, UInt, Poly, + BFloat16, }; TypeKind Kind; bool Immediate, Constant, Pointer; @@ -198,6 +201,7 @@ public: bool isInt() const { return isInteger() && ElementBitwidth == 32; } bool isLong() const { return isInteger() && ElementBitwidth == 64; } bool isVoid() const { return Kind == Void; } + bool isBFloat16() const { return Kind == BFloat16; } unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } unsigned getSizeInBits() const { return Bitwidth; } unsigned getElementSizeInBits() const { return ElementBitwidth; } @@ -238,6 +242,11 @@ public: NumVectors = 1; } + void make32BitElement() { + assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); + ElementBitwidth = 32; + } + void doubleLanes() { assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); Bitwidth = 128; @@ -297,14 +306,12 @@ public: /// The main grunt class. This represents an instantiation of an intrinsic with /// a particular typespec and prototype. class Intrinsic { - friend class DagEmitter; - /// The Record this intrinsic was created from. Record *R; /// The unmangled name. std::string Name; /// The input and output typespecs. InTS == OutTS except when - /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. + /// CartesianProductWith is non-empty - this is the case for vreinterpret. TypeSpec OutTS, InTS; /// The base class kind. Most intrinsics use ClassS, which has full type /// info for integers (s32/u32). Some use ClassI, which doesn't care about @@ -337,7 +344,7 @@ class Intrinsic { /// The set of intrinsics that this intrinsic uses/requires. std::set<Intrinsic *> Dependencies; /// The "base type", which is Type('d', OutTS). InBaseType is only - /// different if CartesianProductOfTypes = 1 (for vreinterpret). + /// different if CartesianProductWith is non-empty (for vreinterpret). Type BaseType, InBaseType; /// The return variable. Variable RetVar; @@ -518,7 +525,8 @@ private: std::pair<Type, std::string> emitDagDupTyped(DagInit *DI); std::pair<Type, std::string> emitDagShuffle(DagInit *DI); std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); - std::pair<Type, std::string> emitDagCall(DagInit *DI); + std::pair<Type, std::string> emitDagCall(DagInit *DI, + bool MatchMangledName); std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); std::pair<Type, std::string> emitDagLiteral(DagInit *DI); std::pair<Type, std::string> emitDagOp(DagInit *DI); @@ -546,7 +554,8 @@ class NeonEmitter { public: /// Called by Intrinsic - this attempts to get an intrinsic that takes /// the given types as arguments. - Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types); + Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types, + Optional<std::string> MangledName); /// Called by Intrinsic - returns a globally-unique number. unsigned getUniqueNumber() { return UniqueNumber++; } @@ -577,8 +586,11 @@ public: // runFP16 - Emit arm_fp16.h.inc void runFP16(raw_ostream &o); - // runHeader - Emit all the __builtin prototypes used in arm_neon.h - // and arm_fp16.h + // runBF16 - Emit arm_bf16.h.inc + void runBF16(raw_ostream &o); + + // runHeader - Emit all the __builtin prototypes used in arm_neon.h, + // arm_fp16.h and arm_bf16.h void runHeader(raw_ostream &o); // runTests - Emit tests for all the Neon intrinsics. @@ -603,6 +615,8 @@ std::string Type::str() const { S += "poly"; else if (isFloating()) S += "float"; + else if (isBFloat16()) + S += "bfloat"; else S += "int"; @@ -642,7 +656,10 @@ std::string Type::builtin_str() const { case 128: S += "LLLi"; break; default: llvm_unreachable("Unhandled case!"); } - else + else if (isBFloat16()) { + assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits"); + S += "y"; + } else switch (ElementBitwidth) { case 16: S += "h"; break; case 32: S += "f"; break; @@ -696,6 +713,11 @@ unsigned Type::getNeonEnum() const { Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); } + if (isBFloat16()) { + assert(Addend == 1 && "BFloat16 is only 16 bit"); + Base = (unsigned)NeonTypeFlags::BFloat16; + } + if (Bitwidth == 128) Base |= (unsigned)NeonTypeFlags::QuadFlag; if (isInteger() && !isSigned()) @@ -719,6 +741,9 @@ Type Type::fromTypedefName(StringRef Name) { } else if (Name.startswith("poly")) { T.Kind = Poly; Name = Name.drop_front(4); + } else if (Name.startswith("bfloat")) { + T.Kind = BFloat16; + Name = Name.drop_front(6); } else { assert(Name.startswith("int")); Name = Name.drop_front(3); @@ -817,6 +842,10 @@ void Type::applyTypespec(bool &Quad) { if (isPoly()) NumVectors = 0; break; + case 'b': + Kind = BFloat16; + ElementBitwidth = 16; + break; default: llvm_unreachable("Unhandled type code!"); } @@ -843,6 +872,10 @@ void Type::applyModifiers(StringRef Mods) { case 'U': Kind = UInt; break; + case 'B': + Kind = BFloat16; + ElementBitwidth = 16; + break; case 'F': Kind = Float; break; @@ -924,6 +957,9 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { if (CK == ClassB) return ""; + if (T.isBFloat16()) + return "bf16"; + if (T.isPoly()) typeCode = 'p'; else if (T.isInteger()) @@ -961,7 +997,7 @@ std::string Intrinsic::getBuiltinTypeStr() { Type RetT = getReturnType(); if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && - !RetT.isFloating()) + !RetT.isFloating() && !RetT.isBFloat16()) RetT.makeInteger(RetT.getElementSizeInBits(), false); // Since the return value must be one type, return a vector type of the @@ -1026,7 +1062,8 @@ std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { std::string S = Name; if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || - Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32") + Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" || + Name == "vcvt_f32_bf16") return Name; if (!typeCode.empty()) { @@ -1257,7 +1294,7 @@ void Intrinsic::emitBodyAsBuiltinCall() { if (!getReturnType().isVoid() && !SRet) S += "(" + RetVar.getType().str() + ") "; - S += "__builtin_neon_" + mangleName(N, LocalCK) + "("; + S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "("; if (SRet) S += "&" + RetVar.getName() + ", "; @@ -1383,8 +1420,8 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { return emitDagSaveTemp(DI); if (Op == "op") return emitDagOp(DI); - if (Op == "call") - return emitDagCall(DI); + if (Op == "call" || Op == "call_mangled") + return emitDagCall(DI, Op == "call_mangled"); if (Op == "name_replace") return emitDagNameReplace(DI); if (Op == "literal") @@ -1398,25 +1435,26 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { if (DI->getNumArgs() == 2) { // Unary op. std::pair<Type, std::string> R = - emitDagArg(DI->getArg(1), DI->getArgNameStr(1)); + emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); return std::make_pair(R.first, Op + R.second); } else { assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); std::pair<Type, std::string> R1 = - emitDagArg(DI->getArg(1), DI->getArgNameStr(1)); + emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); std::pair<Type, std::string> R2 = - emitDagArg(DI->getArg(2), DI->getArgNameStr(2)); + emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2))); assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); } } -std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) { +std::pair<Type, std::string> +Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) { std::vector<Type> Types; std::vector<std::string> Values; for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { std::pair<Type, std::string> R = - emitDagArg(DI->getArg(I + 1), DI->getArgNameStr(I + 1)); + emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1))); Types.push_back(R.first); Values.push_back(R.second); } @@ -1427,7 +1465,13 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) { N = SI->getAsUnquotedString(); else N = emitDagArg(DI->getArg(0), "").second; - Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types); + Optional<std::string> MangledName; + if (MatchMangledName) { + if (Intr.getRecord()->getValueAsBit("isLaneQ")) + N += "q"; + MangledName = Intr.mangleName(N, ClassS); + } + Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName); // Make sure the callee is known as an early def. Callee.setNeededEarly(); @@ -1451,9 +1495,9 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) { std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, bool IsBitCast){ // (cast MOD* VAL) -> cast VAL to type given by MOD. - std::pair<Type, std::string> R = emitDagArg( - DI->getArg(DI->getNumArgs() - 1), - DI->getArgNameStr(DI->getNumArgs() - 1)); + std::pair<Type, std::string> R = + emitDagArg(DI->getArg(DI->getNumArgs() - 1), + std::string(DI->getArgNameStr(DI->getNumArgs() - 1))); Type castToType = R.first; for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { @@ -1465,10 +1509,11 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, // 5. The value "H" or "D" to half or double the bitwidth. // 6. The value "8" to convert to 8-bit (signed) integer lanes. if (!DI->getArgNameStr(ArgIdx).empty()) { - assert_with_loc(Intr.Variables.find(DI->getArgNameStr(ArgIdx)) != - Intr.Variables.end(), + assert_with_loc(Intr.Variables.find(std::string( + DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(), "Variable not found"); - castToType = Intr.Variables[DI->getArgNameStr(ArgIdx)].getType(); + castToType = + Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType(); } else { StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); assert_with_loc(SI, "Expected string type or $Name for cast type"); @@ -1485,6 +1530,8 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, castToType.doubleLanes(); } else if (SI->getAsUnquotedString() == "8") { castToType.makeInteger(8, true); + } else if (SI->getAsUnquotedString() == "32") { + castToType.make32BitElement(); } else { castToType = Type::fromTypedefName(SI->getAsUnquotedString()); assert_with_loc(!castToType.isVoid(), "Unknown typedef"); @@ -1583,9 +1630,9 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ // (shuffle arg1, arg2, sequence) std::pair<Type, std::string> Arg1 = - emitDagArg(DI->getArg(0), DI->getArgNameStr(0)); + emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); std::pair<Type, std::string> Arg2 = - emitDagArg(DI->getArg(1), DI->getArgNameStr(1)); + emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); assert_with_loc(Arg1.first == Arg2.first, "Different types in arguments to shuffle!"); @@ -1627,8 +1674,8 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); - std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), - DI->getArgNameStr(0)); + std::pair<Type, std::string> A = + emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); Type T = Intr.getBaseType(); @@ -1646,10 +1693,10 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) { assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments"); - std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), - DI->getArgNameStr(0)); - std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), - DI->getArgNameStr(1)); + std::pair<Type, std::string> A = + emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); + std::pair<Type, std::string> B = + emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); assert_with_loc(B.first.isScalar(), "dup_typed() requires a scalar as the second argument"); @@ -1668,10 +1715,10 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); - std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), - DI->getArgNameStr(0)); - std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), - DI->getArgNameStr(1)); + std::pair<Type, std::string> A = + emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); + std::pair<Type, std::string> B = + emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); assert_with_loc(B.first.isScalar(), "splat() requires a scalar int as the second argument"); @@ -1687,13 +1734,13 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); - std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), - DI->getArgNameStr(1)); + std::pair<Type, std::string> A = + emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); assert_with_loc(!A.first.isVoid(), "Argument to save_temp() must have non-void type!"); - std::string N = DI->getArgNameStr(0); + std::string N = std::string(DI->getArgNameStr(0)); assert_with_loc(!N.empty(), "save_temp() expects a name as the first argument"); @@ -1831,7 +1878,8 @@ void Intrinsic::indexBody() { // NeonEmitter implementation //===----------------------------------------------------------------------===// -Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) { +Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types, + Optional<std::string> MangledName) { // First, look up the name in the intrinsic map. assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), ("Intrinsic '" + Name + "' not found!").str()); @@ -1860,17 +1908,19 @@ Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) { } ErrMsg += ")\n"; + if (MangledName && MangledName != I.getMangledName(true)) + continue; + if (I.getNumParams() != Types.size()) continue; - bool Good = true; - for (unsigned Arg = 0; Arg < Types.size(); ++Arg) { - if (I.getParamType(Arg) != Types[Arg]) { - Good = false; - break; - } - } - if (Good) + unsigned ArgNum = 0; + bool MatchingArgumentTypes = + std::all_of(Types.begin(), Types.end(), [&](const auto &Type) { + return Type == I.getParamType(ArgNum++); + }); + + if (MatchingArgumentTypes) GoodVec.push_back(&I); } @@ -1883,14 +1933,14 @@ Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) { void NeonEmitter::createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out) { - std::string Name = R->getValueAsString("Name"); - std::string Proto = R->getValueAsString("Prototype"); - std::string Types = R->getValueAsString("Types"); + std::string Name = std::string(R->getValueAsString("Name")); + std::string Proto = std::string(R->getValueAsString("Prototype")); + std::string Types = std::string(R->getValueAsString("Types")); Record *OperationRec = R->getValueAsDef("Operation"); - bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes"); bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); - std::string Guard = R->getValueAsString("ArchGuard"); + std::string Guard = std::string(R->getValueAsString("ArchGuard")); bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); + std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith")); // Set the global current record. This allows assert_with_loc to produce // decent location information even when highly nested. @@ -1905,17 +1955,20 @@ void NeonEmitter::createIntrinsic(Record *R, CK = ClassMap[R->getSuperClasses()[1].first]; std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; - for (auto TS : TypeSpecs) { - if (CartesianProductOfTypes) { + if (!CartesianProductWith.empty()) { + std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith); + for (auto TS : TypeSpecs) { Type DefaultT(TS, "."); - for (auto SrcTS : TypeSpecs) { + for (auto SrcTS : ProductTypeSpecs) { Type DefaultSrcT(SrcTS, "."); if (TS == SrcTS || DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) continue; NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); } - } else { + } + } else { + for (auto TS : TypeSpecs) { NewTypeSpecs.push_back(std::make_pair(TS, TS)); } } @@ -2143,6 +2196,74 @@ void NeonEmitter::runHeader(raw_ostream &OS) { genIntrinsicRangeCheckCode(OS, Defs); } +static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) { + std::string TypedefTypes(types); + std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); + + // Emit vector typedefs. + bool InIfdef = false; + for (auto &TS : TDTypeVec) { + bool IsA64 = false; + Type T(TS, "."); + if (T.isDouble()) + IsA64 = true; + + if (InIfdef && !IsA64) { + OS << "#endif\n"; + InIfdef = false; + } + if (!InIfdef && IsA64) { + OS << "#ifdef __aarch64__\n"; + InIfdef = true; + } + + if (T.isPoly()) + OS << "typedef __attribute__((neon_polyvector_type("; + else + OS << "typedef __attribute__((neon_vector_type("; + + Type T2 = T; + T2.makeScalar(); + OS << T.getNumElements() << "))) "; + OS << T2.str(); + OS << " " << T.str() << ";\n"; + } + if (InIfdef) + OS << "#endif\n"; + OS << "\n"; + + // Emit struct typedefs. + InIfdef = false; + for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { + for (auto &TS : TDTypeVec) { + bool IsA64 = false; + Type T(TS, "."); + if (T.isDouble()) + IsA64 = true; + + if (InIfdef && !IsA64) { + OS << "#endif\n"; + InIfdef = false; + } + if (!InIfdef && IsA64) { + OS << "#ifdef __aarch64__\n"; + InIfdef = true; + } + + const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; + Type VT(TS, Mods); + OS << "typedef struct " << VT.str() << " {\n"; + OS << " " << T.str() << " val"; + OS << "[" << NumMembers << "]"; + OS << ";\n} "; + OS << VT.str() << ";\n"; + OS << "\n"; + } + } + if (InIfdef) + OS << "#endif\n"; +} + /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h /// is comprised of type definitions and function declarations. void NeonEmitter::run(raw_ostream &OS) { @@ -2191,12 +2312,22 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "#ifndef __ARM_NEON_H\n"; OS << "#define __ARM_NEON_H\n\n"; + OS << "#ifndef __ARM_FP\n"; + OS << "#error \"NEON intrinsics not available with the soft-float ABI. " + "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n"; + OS << "#else\n\n"; + OS << "#if !defined(__ARM_NEON)\n"; OS << "#error \"NEON support not enabled\"\n"; - OS << "#endif\n\n"; + OS << "#else\n\n"; OS << "#include <stdint.h>\n\n"; + OS << "#ifdef __ARM_FEATURE_BF16\n"; + OS << "#include <arm_bf16.h>\n"; + OS << "typedef __bf16 bfloat16_t;\n"; + OS << "#endif\n\n"; + // Emit NEON-specific scalar typedefs. OS << "typedef float float32_t;\n"; OS << "typedef __fp16 float16_t;\n"; @@ -2214,76 +2345,14 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "#else\n"; OS << "typedef int8_t poly8_t;\n"; OS << "typedef int16_t poly16_t;\n"; + OS << "typedef int64_t poly64_t;\n"; OS << "#endif\n"; - // Emit Neon vector typedefs. - std::string TypedefTypes( - "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); - std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); - - // Emit vector typedefs. - bool InIfdef = false; - for (auto &TS : TDTypeVec) { - bool IsA64 = false; - Type T(TS, "."); - if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) - IsA64 = true; - - if (InIfdef && !IsA64) { - OS << "#endif\n"; - InIfdef = false; - } - if (!InIfdef && IsA64) { - OS << "#ifdef __aarch64__\n"; - InIfdef = true; - } + emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS); - if (T.isPoly()) - OS << "typedef __attribute__((neon_polyvector_type("; - else - OS << "typedef __attribute__((neon_vector_type("; - - Type T2 = T; - T2.makeScalar(); - OS << T.getNumElements() << "))) "; - OS << T2.str(); - OS << " " << T.str() << ";\n"; - } - if (InIfdef) - OS << "#endif\n"; - OS << "\n"; - - // Emit struct typedefs. - InIfdef = false; - for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { - for (auto &TS : TDTypeVec) { - bool IsA64 = false; - Type T(TS, "."); - if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) - IsA64 = true; - - if (InIfdef && !IsA64) { - OS << "#endif\n"; - InIfdef = false; - } - if (!InIfdef && IsA64) { - OS << "#ifdef __aarch64__\n"; - InIfdef = true; - } - - const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; - Type VT(TS, Mods); - OS << "typedef struct " << VT.str() << " {\n"; - OS << " " << T.str() << " val"; - OS << "[" << NumMembers << "]"; - OS << ";\n} "; - OS << VT.str() << ";\n"; - OS << "\n"; - } - } - if (InIfdef) - OS << "#endif\n"; - OS << "\n"; + OS << "#ifdef __ARM_FEATURE_BF16\n"; + emitNeonTypeDefs("bQb", OS); + OS << "#endif\n\n"; OS << "#define __ai static __inline__ __attribute__((__always_inline__, " "__nodebug__))\n\n"; @@ -2340,6 +2409,8 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "\n"; OS << "#undef __ai\n\n"; + OS << "#endif /* if !defined(__ARM_NEON) */\n"; + OS << "#endif /* ifndef __ARM_FP */\n"; OS << "#endif /* __ARM_NEON_H */\n"; } @@ -2450,6 +2521,84 @@ void NeonEmitter::runFP16(raw_ostream &OS) { OS << "#endif /* __ARM_FP16_H */\n"; } +void NeonEmitter::runBF16(raw_ostream &OS) { + OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics " + "-----------------------------------===\n" + " *\n" + " *\n" + " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " + "Exceptions.\n" + " * See https://llvm.org/LICENSE.txt for license information.\n" + " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" + " *\n" + " *===-----------------------------------------------------------------" + "------===\n" + " */\n\n"; + + OS << "#ifndef __ARM_BF16_H\n"; + OS << "#define __ARM_BF16_H\n\n"; + + OS << "typedef __bf16 bfloat16_t;\n"; + + OS << "#define __ai static __inline__ __attribute__((__always_inline__, " + "__nodebug__))\n\n"; + + SmallVector<Intrinsic *, 128> Defs; + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); + for (auto *R : RV) + createIntrinsic(R, Defs); + + for (auto *I : Defs) + I->indexBody(); + + llvm::stable_sort(Defs, llvm::deref<std::less<>>()); + + // Only emit a def when its requirements have been met. + // FIXME: This loop could be made faster, but it's fast enough for now. + bool MadeProgress = true; + std::string InGuard; + while (!Defs.empty() && MadeProgress) { + MadeProgress = false; + + for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); + I != Defs.end(); /*No step*/) { + bool DependenciesSatisfied = true; + for (auto *II : (*I)->getDependencies()) { + if (llvm::is_contained(Defs, II)) + DependenciesSatisfied = false; + } + if (!DependenciesSatisfied) { + // Try the next one. + ++I; + continue; + } + + // Emit #endif/#if pair if needed. + if ((*I)->getGuard() != InGuard) { + if (!InGuard.empty()) + OS << "#endif\n"; + InGuard = (*I)->getGuard(); + if (!InGuard.empty()) + OS << "#if " << InGuard << "\n"; + } + + // Actually generate the intrinsic code. + OS << (*I)->generate(); + + MadeProgress = true; + I = Defs.erase(I); + } + } + assert(Defs.empty() && "Some requirements were not satisfied!"); + if (!InGuard.empty()) + OS << "#endif\n"; + + OS << "\n"; + OS << "#undef __ai\n\n"; + + OS << "#endif\n"; +} + void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).run(OS); } @@ -2458,6 +2607,10 @@ void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).runFP16(OS); } +void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) { + NeonEmitter(Records).runBF16(OS); +} + void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).runHeader(OS); } |